1
0
Fork 0

Convert nico comments

This commit is contained in:
Luca Beltrame 2019-01-03 12:49:53 +01:00
parent cfa95b82f4
commit 17da3fbdeb
Signed by: einar
GPG key ID: 8DF631FD021DB0C5

83
nico_comment.py Executable file
View file

@ -0,0 +1,83 @@
#!/usr/bin/python3
import asyncio
import argparse
from datetime import datetime
import netrc
import os
from urllib.parse import urlparse
import pandas as pd
from nicotools.download import Comment, Info, utils
import simplejson as json
def process_element(element):
element = element["chat"]
video_pos = element["vpos"] * 10
comment = element["content"]
video_time = datetime.fromtimestamp(video_pos / 1000).strftime("%M:%S.%f")
return (video_time, comment)
def extract_ids(urls):
video_ids = list()
for url in urls:
parsed = urlparse(url)
nico_id = os.path.split(parsed.path)[-1]
video_ids.append(nico_id)
return nico_id
class CommentStream(Comment):
def saver(self, video_id: str, is_xml: bool,
coroutine: asyncio.Task) -> bool:
comment_data = coroutine.result()
conts = list()
if not is_xml:
data = json.loads(comment_data)
contents = [process_element(item)
for item in data if "chat" in item]
file_path = utils.make_name(self.glossary[video_id], "",
extention="txt")
df = pd.DataFrame.from_records(contents,
columns=["Time", "Comment"])
df["Time"] = pd.to_datetime(df["Time"],
format="%M:%S.%f").dt.time
df = df.sort_values(by="Time")
with file_path.open("w", encoding="utf-8") as f:
df.to_csv(f, sep="\t", index=False)
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument("video", help="Video URL(s)", nargs="+")
parser.add_argument("-d", "--destination", help="Destination directory",
default="./")
options = parser.parse_args()
user, _, password = netrc.netrc().hosts["niconico"]
video_ids = extract_ids(options.video)
database = Info(video_ids, mail=user, password=password).info
com = CommentStream(database, user, password, save_dir=options.destination)
com.start()
if __name__ == "__main__":
main()