1
0
Fork 0

Support exporting nico comments as SRT

This commit is contained in:
Luca Beltrame 2019-12-29 03:49:43 +01:00
parent e3d662a9fe
commit e1bd6c145a
Signed by: einar
GPG key ID: 8DF631FD021DB0C5

View file

@ -8,6 +8,7 @@ import os
from urllib.parse import urlparse
import pandas as pd
import pysrt
from nicotools.download import Comment, Info, utils
import simplejson as json
@ -35,30 +36,58 @@ def extract_ids(urls):
return nico_id
def build_srt(data):
srt_file = pysrt.SubRipFile()
for index, proc in enumerate(data):
time, text = proc
time = datetime.strptime(time, "%M:%S.%f")
subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second,
milliseconds=time.microsecond / 1000)
subtime_end = pysrt.SubRipTime(minutes=time.minute,
seconds=time.second + 2,
milliseconds=time.microsecond / 1000)
entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text)
srt_file.append(entry)
return srt_file
class CommentStream(Comment):
srt = False
def saver(self, video_id: str, is_xml: bool,
coroutine: asyncio.Task) -> bool:
if is_xml:
super().saver(video_id, is_xml, coroutine)
return True
comment_data = coroutine.result()
conts = list()
if not is_xml:
data = json.loads(comment_data)
contents = [process_element(item)
for item in data if "chat" in item]
data = json.loads(comment_data)
contents = [process_element(item)
for item in data if "chat" in item]
file_path = utils.make_name(self.glossary[video_id], "",
extention="txt")
file_path = utils.make_name(self.glossary[video_id], "",
extention="txt")
file_srt = utils.make_name(self.glossary[video_id], "",
extention="srt")
df = pd.DataFrame.from_records(contents,
columns=["Time", "Comment"])
df["Time"] = pd.to_datetime(df["Time"],
format="%M:%S.%f").dt.time
df = df.sort_values(by="Time")
df = pd.DataFrame.from_records(contents,
columns=["Time", "Comment"])
df["Time"] = pd.to_datetime(df["Time"],
format="%M:%S.%f").dt.time
df = df.sort_values(by="Time")
with file_path.open("w", encoding="utf-8") as f:
df.to_csv(f, sep="\t", index=False)
with file_path.open("w", encoding="utf-8") as f:
df.to_csv(f, sep="\t", index=False)
if self.srt:
srt_data = build_srt(contents)
srt_data.save(str(file_srt))
return True
@ -69,6 +98,8 @@ def main():
parser.add_argument("video", help="Video URL(s)", nargs="+")
parser.add_argument("-d", "--destination", help="Destination directory",
default="./")
parser.add_argument("--no-srt", action="store_false",
help="Don't generate SRT")
options = parser.parse_args()
user, _, password = netrc.netrc().hosts["niconico"]
@ -76,6 +107,7 @@ def main():
database = Info(video_ids, mail=user, password=password).info
com = CommentStream(database, user, password, save_dir=options.destination)
com.srt = options.no_srt
com.start()