#!/usr/bin/python3 # SPDX-FileCopyrightText: 2021 Luca Beltrame # # SPDX-License-Identifier: BSD-3-Clause import asyncio import argparse from datetime import datetime import netrc import os from urllib.parse import urlparse import pandas as pd import pysrt from nicotools.download import Comment, Info, utils import simplejson as json def process_element(element): element = element["chat"] video_pos = element["vpos"] * 10 comment = element["content"] video_time = datetime.fromtimestamp(video_pos / 1000).strftime("%M:%S.%f") return (video_time, comment) def extract_ids(urls): video_ids = list() for url in urls: parsed = urlparse(url) nico_id = os.path.split(parsed.path)[-1] video_ids.append(nico_id) return nico_id def build_srt(data): srt_file = pysrt.SubRipFile() for index, proc in enumerate(data): time, text = proc time = datetime.strptime(time, "%M:%S.%f") subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second, milliseconds=time.microsecond / 1000) subtime_end = pysrt.SubRipTime(minutes=time.minute, seconds=time.second + 2, milliseconds=time.microsecond / 1000) entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text) srt_file.append(entry) return srt_file class CommentStream(Comment): srt = False def saver(self, video_id: str, is_xml: bool, coroutine: asyncio.Task) -> bool: if is_xml: super().saver(video_id, is_xml, coroutine) return True comment_data = coroutine.result() conts = list() data = json.loads(comment_data) contents = [process_element(item) for item in data if "chat" in item] file_path = utils.make_name(self.glossary[video_id], "", extention="txt") file_srt = utils.make_name(self.glossary[video_id], "", extention="srt") df = pd.DataFrame.from_records(contents, columns=["Time", "Comment"]) df["Time"] = pd.to_datetime(df["Time"], format="%M:%S.%f").dt.time df = df.sort_values(by="Time") with file_path.open("w", encoding="utf-8") as f: df.to_csv(f, sep="\t", index=False) if self.srt: srt_data = build_srt(contents) srt_data.save(str(file_srt)) return True def main(): parser = argparse.ArgumentParser() parser.add_argument("video", help="Video URL(s)", nargs="+") parser.add_argument("-d", "--destination", help="Destination directory", default="./") parser.add_argument("--no-srt", action="store_false", help="Don't generate SRT") options = parser.parse_args() user, _, password = netrc.netrc().hosts["niconico"] video_ids = extract_ids(options.video) database = Info(video_ids, mail=user, password=password).info com = CommentStream(database, user, password, save_dir=options.destination) com.srt = options.no_srt com.start() if __name__ == "__main__": main()