1
0
Fork 0

Support exporting nico comments as SRT

This commit is contained in:
Luca Beltrame 2019-12-29 03:49:43 +01:00
parent e3d662a9fe
commit e1bd6c145a
Signed by: einar
GPG key ID: 8DF631FD021DB0C5

View file

@ -8,6 +8,7 @@ import os
from urllib.parse import urlparse from urllib.parse import urlparse
import pandas as pd import pandas as pd
import pysrt
from nicotools.download import Comment, Info, utils from nicotools.download import Comment, Info, utils
import simplejson as json import simplejson as json
@ -35,20 +36,45 @@ def extract_ids(urls):
return nico_id return nico_id
def build_srt(data):
srt_file = pysrt.SubRipFile()
for index, proc in enumerate(data):
time, text = proc
time = datetime.strptime(time, "%M:%S.%f")
subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second,
milliseconds=time.microsecond / 1000)
subtime_end = pysrt.SubRipTime(minutes=time.minute,
seconds=time.second + 2,
milliseconds=time.microsecond / 1000)
entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text)
srt_file.append(entry)
return srt_file
class CommentStream(Comment): class CommentStream(Comment):
srt = False
def saver(self, video_id: str, is_xml: bool, def saver(self, video_id: str, is_xml: bool,
coroutine: asyncio.Task) -> bool: coroutine: asyncio.Task) -> bool:
if is_xml:
super().saver(video_id, is_xml, coroutine)
return True
comment_data = coroutine.result() comment_data = coroutine.result()
conts = list() conts = list()
if not is_xml:
data = json.loads(comment_data) data = json.loads(comment_data)
contents = [process_element(item) contents = [process_element(item)
for item in data if "chat" in item] for item in data if "chat" in item]
file_path = utils.make_name(self.glossary[video_id], "", file_path = utils.make_name(self.glossary[video_id], "",
extention="txt") extention="txt")
file_srt = utils.make_name(self.glossary[video_id], "",
extention="srt")
df = pd.DataFrame.from_records(contents, df = pd.DataFrame.from_records(contents,
columns=["Time", "Comment"]) columns=["Time", "Comment"])
@ -56,10 +82,13 @@ class CommentStream(Comment):
format="%M:%S.%f").dt.time format="%M:%S.%f").dt.time
df = df.sort_values(by="Time") df = df.sort_values(by="Time")
with file_path.open("w", encoding="utf-8") as f: with file_path.open("w", encoding="utf-8") as f:
df.to_csv(f, sep="\t", index=False) df.to_csv(f, sep="\t", index=False)
if self.srt:
srt_data = build_srt(contents)
srt_data.save(str(file_srt))
return True return True
@ -69,6 +98,8 @@ def main():
parser.add_argument("video", help="Video URL(s)", nargs="+") parser.add_argument("video", help="Video URL(s)", nargs="+")
parser.add_argument("-d", "--destination", help="Destination directory", parser.add_argument("-d", "--destination", help="Destination directory",
default="./") default="./")
parser.add_argument("--no-srt", action="store_false",
help="Don't generate SRT")
options = parser.parse_args() options = parser.parse_args()
user, _, password = netrc.netrc().hosts["niconico"] user, _, password = netrc.netrc().hosts["niconico"]
@ -76,6 +107,7 @@ def main():
database = Info(video_ids, mail=user, password=password).info database = Info(video_ids, mail=user, password=password).info
com = CommentStream(database, user, password, save_dir=options.destination) com = CommentStream(database, user, password, save_dir=options.destination)
com.srt = options.no_srt
com.start() com.start()