1
0
Fork 0
scripts/misc/nico_comment.py
Luca Beltrame c4f7279f2e
Restructure directory layout
To make this better than the unorganized mess it used to be.
2021-01-03 15:26:29 +01:00

119 lines
3.3 KiB
Python
Executable file

#!/usr/bin/python3
# SPDX-FileCopyrightText: 2021 Luca Beltrame <lbeltrame@kde.org>
#
# SPDX-License-Identifier: BSD-3-Clause
import asyncio
import argparse
from datetime import datetime
import netrc
import os
from urllib.parse import urlparse
import pandas as pd
import pysrt
from nicotools.download import Comment, Info, utils
import simplejson as json
def process_element(element):
element = element["chat"]
video_pos = element["vpos"] * 10
comment = element["content"]
video_time = datetime.fromtimestamp(video_pos / 1000).strftime("%M:%S.%f")
return (video_time, comment)
def extract_ids(urls):
video_ids = list()
for url in urls:
parsed = urlparse(url)
nico_id = os.path.split(parsed.path)[-1]
video_ids.append(nico_id)
return nico_id
def build_srt(data):
srt_file = pysrt.SubRipFile()
for index, proc in enumerate(data):
time, text = proc
time = datetime.strptime(time, "%M:%S.%f")
subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second,
milliseconds=time.microsecond / 1000)
subtime_end = pysrt.SubRipTime(minutes=time.minute,
seconds=time.second + 2,
milliseconds=time.microsecond / 1000)
entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text)
srt_file.append(entry)
return srt_file
class CommentStream(Comment):
srt = False
def saver(self, video_id: str, is_xml: bool,
coroutine: asyncio.Task) -> bool:
if is_xml:
super().saver(video_id, is_xml, coroutine)
return True
comment_data = coroutine.result()
conts = list()
data = json.loads(comment_data)
contents = [process_element(item)
for item in data if "chat" in item]
file_path = utils.make_name(self.glossary[video_id], "",
extention="txt")
file_srt = utils.make_name(self.glossary[video_id], "",
extention="srt")
df = pd.DataFrame.from_records(contents,
columns=["Time", "Comment"])
df["Time"] = pd.to_datetime(df["Time"],
format="%M:%S.%f").dt.time
df = df.sort_values(by="Time")
with file_path.open("w", encoding="utf-8") as f:
df.to_csv(f, sep="\t", index=False)
if self.srt:
srt_data = build_srt(contents)
srt_data.save(str(file_srt))
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument("video", help="Video URL(s)", nargs="+")
parser.add_argument("-d", "--destination", help="Destination directory",
default="./")
parser.add_argument("--no-srt", action="store_false",
help="Don't generate SRT")
options = parser.parse_args()
user, _, password = netrc.netrc().hosts["niconico"]
video_ids = extract_ids(options.video)
database = Info(video_ids, mail=user, password=password).info
com = CommentStream(database, user, password, save_dir=options.destination)
com.srt = options.no_srt
com.start()
if __name__ == "__main__":
main()