Support exporting nico comments as SRT
This commit is contained in:
parent
e3d662a9fe
commit
e1bd6c145a
1 changed files with 45 additions and 13 deletions
|
@ -8,6 +8,7 @@ import os
|
|||
from urllib.parse import urlparse
|
||||
|
||||
import pandas as pd
|
||||
import pysrt
|
||||
from nicotools.download import Comment, Info, utils
|
||||
import simplejson as json
|
||||
|
||||
|
@ -35,30 +36,58 @@ def extract_ids(urls):
|
|||
return nico_id
|
||||
|
||||
|
||||
def build_srt(data):
|
||||
|
||||
srt_file = pysrt.SubRipFile()
|
||||
for index, proc in enumerate(data):
|
||||
time, text = proc
|
||||
time = datetime.strptime(time, "%M:%S.%f")
|
||||
subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second,
|
||||
milliseconds=time.microsecond / 1000)
|
||||
subtime_end = pysrt.SubRipTime(minutes=time.minute,
|
||||
seconds=time.second + 2,
|
||||
milliseconds=time.microsecond / 1000)
|
||||
entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text)
|
||||
srt_file.append(entry)
|
||||
|
||||
return srt_file
|
||||
|
||||
|
||||
class CommentStream(Comment):
|
||||
|
||||
srt = False
|
||||
|
||||
def saver(self, video_id: str, is_xml: bool,
|
||||
coroutine: asyncio.Task) -> bool:
|
||||
|
||||
if is_xml:
|
||||
super().saver(video_id, is_xml, coroutine)
|
||||
return True
|
||||
|
||||
comment_data = coroutine.result()
|
||||
conts = list()
|
||||
|
||||
if not is_xml:
|
||||
data = json.loads(comment_data)
|
||||
contents = [process_element(item)
|
||||
for item in data if "chat" in item]
|
||||
data = json.loads(comment_data)
|
||||
contents = [process_element(item)
|
||||
for item in data if "chat" in item]
|
||||
|
||||
file_path = utils.make_name(self.glossary[video_id], "",
|
||||
extention="txt")
|
||||
file_path = utils.make_name(self.glossary[video_id], "",
|
||||
extention="txt")
|
||||
file_srt = utils.make_name(self.glossary[video_id], "",
|
||||
extention="srt")
|
||||
|
||||
df = pd.DataFrame.from_records(contents,
|
||||
columns=["Time", "Comment"])
|
||||
df["Time"] = pd.to_datetime(df["Time"],
|
||||
format="%M:%S.%f").dt.time
|
||||
df = df.sort_values(by="Time")
|
||||
df = pd.DataFrame.from_records(contents,
|
||||
columns=["Time", "Comment"])
|
||||
df["Time"] = pd.to_datetime(df["Time"],
|
||||
format="%M:%S.%f").dt.time
|
||||
df = df.sort_values(by="Time")
|
||||
|
||||
with file_path.open("w", encoding="utf-8") as f:
|
||||
df.to_csv(f, sep="\t", index=False)
|
||||
|
||||
with file_path.open("w", encoding="utf-8") as f:
|
||||
df.to_csv(f, sep="\t", index=False)
|
||||
if self.srt:
|
||||
srt_data = build_srt(contents)
|
||||
srt_data.save(str(file_srt))
|
||||
|
||||
return True
|
||||
|
||||
|
@ -69,6 +98,8 @@ def main():
|
|||
parser.add_argument("video", help="Video URL(s)", nargs="+")
|
||||
parser.add_argument("-d", "--destination", help="Destination directory",
|
||||
default="./")
|
||||
parser.add_argument("--no-srt", action="store_false",
|
||||
help="Don't generate SRT")
|
||||
|
||||
options = parser.parse_args()
|
||||
user, _, password = netrc.netrc().hosts["niconico"]
|
||||
|
@ -76,6 +107,7 @@ def main():
|
|||
|
||||
database = Info(video_ids, mail=user, password=password).info
|
||||
com = CommentStream(database, user, password, save_dir=options.destination)
|
||||
com.srt = options.no_srt
|
||||
com.start()
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue