From 17da3fbdeb2f53de0d04cac8d14910dc89b96d0b Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Thu, 3 Jan 2019 12:49:53 +0100 Subject: [PATCH] Convert nico comments --- nico_comment.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100755 nico_comment.py diff --git a/nico_comment.py b/nico_comment.py new file mode 100755 index 0000000..34b4b1d --- /dev/null +++ b/nico_comment.py @@ -0,0 +1,83 @@ +#!/usr/bin/python3 + +import asyncio +import argparse +from datetime import datetime +import netrc +import os +from urllib.parse import urlparse + +import pandas as pd +from nicotools.download import Comment, Info, utils +import simplejson as json + + +def process_element(element): + + element = element["chat"] + + video_pos = element["vpos"] * 10 + comment = element["content"] + video_time = datetime.fromtimestamp(video_pos / 1000).strftime("%M:%S.%f") + + return (video_time, comment) + + +def extract_ids(urls): + + video_ids = list() + + for url in urls: + parsed = urlparse(url) + nico_id = os.path.split(parsed.path)[-1] + video_ids.append(nico_id) + + return nico_id + + +class CommentStream(Comment): + + def saver(self, video_id: str, is_xml: bool, + coroutine: asyncio.Task) -> bool: + comment_data = coroutine.result() + conts = list() + + if not is_xml: + data = json.loads(comment_data) + contents = [process_element(item) + for item in data if "chat" in item] + + file_path = utils.make_name(self.glossary[video_id], "", + extention="txt") + + df = pd.DataFrame.from_records(contents, + columns=["Time", "Comment"]) + df["Time"] = pd.to_datetime(df["Time"], + format="%M:%S.%f").dt.time + df = df.sort_values(by="Time") + + + with file_path.open("w", encoding="utf-8") as f: + df.to_csv(f, sep="\t", index=False) + + return True + + +def main(): + + parser = argparse.ArgumentParser() + parser.add_argument("video", help="Video URL(s)", nargs="+") + parser.add_argument("-d", "--destination", help="Destination directory", + default="./") + + options = parser.parse_args() + user, _, password = netrc.netrc().hosts["niconico"] + video_ids = extract_ids(options.video) + + database = Info(video_ids, mail=user, password=password).info + com = CommentStream(database, user, password, save_dir=options.destination) + com.start() + + +if __name__ == "__main__": + main()