119 lines
3.3 KiB
Python
Executable file
119 lines
3.3 KiB
Python
Executable file
#!/usr/bin/python3
|
|
|
|
# SPDX-FileCopyrightText: 2021 Luca Beltrame <lbeltrame@kde.org>
|
|
#
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
import asyncio
|
|
import argparse
|
|
from datetime import datetime
|
|
import netrc
|
|
import os
|
|
from urllib.parse import urlparse
|
|
|
|
import pandas as pd
|
|
import pysrt
|
|
from nicotools.download import Comment, Info, utils
|
|
import simplejson as json
|
|
|
|
|
|
def process_element(element):
|
|
|
|
element = element["chat"]
|
|
|
|
video_pos = element["vpos"] * 10
|
|
comment = element["content"]
|
|
video_time = datetime.fromtimestamp(video_pos / 1000).strftime("%M:%S.%f")
|
|
|
|
return (video_time, comment)
|
|
|
|
|
|
def extract_ids(urls):
|
|
|
|
video_ids = list()
|
|
|
|
for url in urls:
|
|
parsed = urlparse(url)
|
|
nico_id = os.path.split(parsed.path)[-1]
|
|
video_ids.append(nico_id)
|
|
|
|
return nico_id
|
|
|
|
|
|
def build_srt(data):
|
|
|
|
srt_file = pysrt.SubRipFile()
|
|
for index, proc in enumerate(data):
|
|
time, text = proc
|
|
time = datetime.strptime(time, "%M:%S.%f")
|
|
subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second,
|
|
milliseconds=time.microsecond / 1000)
|
|
subtime_end = pysrt.SubRipTime(minutes=time.minute,
|
|
seconds=time.second + 2,
|
|
milliseconds=time.microsecond / 1000)
|
|
entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text)
|
|
srt_file.append(entry)
|
|
|
|
return srt_file
|
|
|
|
|
|
class CommentStream(Comment):
|
|
|
|
srt = False
|
|
|
|
def saver(self, video_id: str, is_xml: bool,
|
|
coroutine: asyncio.Task) -> bool:
|
|
|
|
if is_xml:
|
|
super().saver(video_id, is_xml, coroutine)
|
|
return True
|
|
|
|
comment_data = coroutine.result()
|
|
conts = list()
|
|
|
|
data = json.loads(comment_data)
|
|
contents = [process_element(item)
|
|
for item in data if "chat" in item]
|
|
|
|
file_path = utils.make_name(self.glossary[video_id], "",
|
|
extention="txt")
|
|
file_srt = utils.make_name(self.glossary[video_id], "",
|
|
extention="srt")
|
|
|
|
df = pd.DataFrame.from_records(contents,
|
|
columns=["Time", "Comment"])
|
|
df["Time"] = pd.to_datetime(df["Time"],
|
|
format="%M:%S.%f").dt.time
|
|
df = df.sort_values(by="Time")
|
|
|
|
with file_path.open("w", encoding="utf-8") as f:
|
|
df.to_csv(f, sep="\t", index=False)
|
|
|
|
if self.srt:
|
|
srt_data = build_srt(contents)
|
|
srt_data.save(str(file_srt))
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("video", help="Video URL(s)", nargs="+")
|
|
parser.add_argument("-d", "--destination", help="Destination directory",
|
|
default="./")
|
|
parser.add_argument("--no-srt", action="store_false",
|
|
help="Don't generate SRT")
|
|
|
|
options = parser.parse_args()
|
|
user, _, password = netrc.netrc().hosts["niconico"]
|
|
video_ids = extract_ids(options.video)
|
|
|
|
database = Info(video_ids, mail=user, password=password).info
|
|
com = CommentStream(database, user, password, save_dir=options.destination)
|
|
com.srt = options.no_srt
|
|
com.start()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|