Support exporting nico comments as SRT
This commit is contained in:
parent
e3d662a9fe
commit
e1bd6c145a
1 changed files with 45 additions and 13 deletions
|
@ -8,6 +8,7 @@ import os
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import pysrt
|
||||||
from nicotools.download import Comment, Info, utils
|
from nicotools.download import Comment, Info, utils
|
||||||
import simplejson as json
|
import simplejson as json
|
||||||
|
|
||||||
|
@ -35,20 +36,45 @@ def extract_ids(urls):
|
||||||
return nico_id
|
return nico_id
|
||||||
|
|
||||||
|
|
||||||
|
def build_srt(data):
|
||||||
|
|
||||||
|
srt_file = pysrt.SubRipFile()
|
||||||
|
for index, proc in enumerate(data):
|
||||||
|
time, text = proc
|
||||||
|
time = datetime.strptime(time, "%M:%S.%f")
|
||||||
|
subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second,
|
||||||
|
milliseconds=time.microsecond / 1000)
|
||||||
|
subtime_end = pysrt.SubRipTime(minutes=time.minute,
|
||||||
|
seconds=time.second + 2,
|
||||||
|
milliseconds=time.microsecond / 1000)
|
||||||
|
entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text)
|
||||||
|
srt_file.append(entry)
|
||||||
|
|
||||||
|
return srt_file
|
||||||
|
|
||||||
|
|
||||||
class CommentStream(Comment):
|
class CommentStream(Comment):
|
||||||
|
|
||||||
|
srt = False
|
||||||
|
|
||||||
def saver(self, video_id: str, is_xml: bool,
|
def saver(self, video_id: str, is_xml: bool,
|
||||||
coroutine: asyncio.Task) -> bool:
|
coroutine: asyncio.Task) -> bool:
|
||||||
|
|
||||||
|
if is_xml:
|
||||||
|
super().saver(video_id, is_xml, coroutine)
|
||||||
|
return True
|
||||||
|
|
||||||
comment_data = coroutine.result()
|
comment_data = coroutine.result()
|
||||||
conts = list()
|
conts = list()
|
||||||
|
|
||||||
if not is_xml:
|
|
||||||
data = json.loads(comment_data)
|
data = json.loads(comment_data)
|
||||||
contents = [process_element(item)
|
contents = [process_element(item)
|
||||||
for item in data if "chat" in item]
|
for item in data if "chat" in item]
|
||||||
|
|
||||||
file_path = utils.make_name(self.glossary[video_id], "",
|
file_path = utils.make_name(self.glossary[video_id], "",
|
||||||
extention="txt")
|
extention="txt")
|
||||||
|
file_srt = utils.make_name(self.glossary[video_id], "",
|
||||||
|
extention="srt")
|
||||||
|
|
||||||
df = pd.DataFrame.from_records(contents,
|
df = pd.DataFrame.from_records(contents,
|
||||||
columns=["Time", "Comment"])
|
columns=["Time", "Comment"])
|
||||||
|
@ -56,10 +82,13 @@ class CommentStream(Comment):
|
||||||
format="%M:%S.%f").dt.time
|
format="%M:%S.%f").dt.time
|
||||||
df = df.sort_values(by="Time")
|
df = df.sort_values(by="Time")
|
||||||
|
|
||||||
|
|
||||||
with file_path.open("w", encoding="utf-8") as f:
|
with file_path.open("w", encoding="utf-8") as f:
|
||||||
df.to_csv(f, sep="\t", index=False)
|
df.to_csv(f, sep="\t", index=False)
|
||||||
|
|
||||||
|
if self.srt:
|
||||||
|
srt_data = build_srt(contents)
|
||||||
|
srt_data.save(str(file_srt))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,6 +98,8 @@ def main():
|
||||||
parser.add_argument("video", help="Video URL(s)", nargs="+")
|
parser.add_argument("video", help="Video URL(s)", nargs="+")
|
||||||
parser.add_argument("-d", "--destination", help="Destination directory",
|
parser.add_argument("-d", "--destination", help="Destination directory",
|
||||||
default="./")
|
default="./")
|
||||||
|
parser.add_argument("--no-srt", action="store_false",
|
||||||
|
help="Don't generate SRT")
|
||||||
|
|
||||||
options = parser.parse_args()
|
options = parser.parse_args()
|
||||||
user, _, password = netrc.netrc().hosts["niconico"]
|
user, _, password = netrc.netrc().hosts["niconico"]
|
||||||
|
@ -76,6 +107,7 @@ def main():
|
||||||
|
|
||||||
database = Info(video_ids, mail=user, password=password).info
|
database = Info(video_ids, mail=user, password=password).info
|
||||||
com = CommentStream(database, user, password, save_dir=options.destination)
|
com = CommentStream(database, user, password, save_dir=options.destination)
|
||||||
|
com.srt = options.no_srt
|
||||||
com.start()
|
com.start()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue