diff --git a/nico_comment.py b/nico_comment.py index 34b4b1d..79b8ecc 100755 --- a/nico_comment.py +++ b/nico_comment.py @@ -8,6 +8,7 @@ import os from urllib.parse import urlparse import pandas as pd +import pysrt from nicotools.download import Comment, Info, utils import simplejson as json @@ -35,30 +36,58 @@ def extract_ids(urls): return nico_id +def build_srt(data): + + srt_file = pysrt.SubRipFile() + for index, proc in enumerate(data): + time, text = proc + time = datetime.strptime(time, "%M:%S.%f") + subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second, + milliseconds=time.microsecond / 1000) + subtime_end = pysrt.SubRipTime(minutes=time.minute, + seconds=time.second + 2, + milliseconds=time.microsecond / 1000) + entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text) + srt_file.append(entry) + + return srt_file + + class CommentStream(Comment): + srt = False + def saver(self, video_id: str, is_xml: bool, coroutine: asyncio.Task) -> bool: + + if is_xml: + super().saver(video_id, is_xml, coroutine) + return True + comment_data = coroutine.result() conts = list() - if not is_xml: - data = json.loads(comment_data) - contents = [process_element(item) - for item in data if "chat" in item] + data = json.loads(comment_data) + contents = [process_element(item) + for item in data if "chat" in item] - file_path = utils.make_name(self.glossary[video_id], "", - extention="txt") + file_path = utils.make_name(self.glossary[video_id], "", + extention="txt") + file_srt = utils.make_name(self.glossary[video_id], "", + extention="srt") - df = pd.DataFrame.from_records(contents, - columns=["Time", "Comment"]) - df["Time"] = pd.to_datetime(df["Time"], - format="%M:%S.%f").dt.time - df = df.sort_values(by="Time") + df = pd.DataFrame.from_records(contents, + columns=["Time", "Comment"]) + df["Time"] = pd.to_datetime(df["Time"], + format="%M:%S.%f").dt.time + df = df.sort_values(by="Time") + with file_path.open("w", encoding="utf-8") as f: + df.to_csv(f, sep="\t", index=False) - with file_path.open("w", encoding="utf-8") as f: - df.to_csv(f, sep="\t", index=False) + if self.srt: + srt_data = build_srt(contents) + srt_data.save(str(file_srt)) return True @@ -69,6 +98,8 @@ def main(): parser.add_argument("video", help="Video URL(s)", nargs="+") parser.add_argument("-d", "--destination", help="Destination directory", default="./") + parser.add_argument("--no-srt", action="store_false", + help="Don't generate SRT") options = parser.parse_args() user, _, password = netrc.netrc().hosts["niconico"] @@ -76,6 +107,7 @@ def main(): database = Info(video_ids, mail=user, password=password).info com = CommentStream(database, user, password, save_dir=options.destination) + com.srt = options.no_srt com.start()