119 lines
		
	
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			119 lines
		
	
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
#!/usr/bin/python3
 | 
						|
 | 
						|
# SPDX-FileCopyrightText: 2021 Luca Beltrame <lbeltrame@kde.org>
 | 
						|
#
 | 
						|
# SPDX-License-Identifier: BSD-3-Clause
 | 
						|
 | 
						|
import asyncio
 | 
						|
import argparse
 | 
						|
from datetime import datetime
 | 
						|
import netrc
 | 
						|
import os
 | 
						|
from urllib.parse import urlparse
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
import pysrt
 | 
						|
from nicotools.download import Comment, Info, utils
 | 
						|
import simplejson as json
 | 
						|
 | 
						|
 | 
						|
def process_element(element):
 | 
						|
 | 
						|
    element = element["chat"]
 | 
						|
 | 
						|
    video_pos = element["vpos"] * 10
 | 
						|
    comment = element["content"]
 | 
						|
    video_time = datetime.fromtimestamp(video_pos / 1000).strftime("%M:%S.%f")
 | 
						|
 | 
						|
    return (video_time, comment)
 | 
						|
 | 
						|
 | 
						|
def extract_ids(urls):
 | 
						|
 | 
						|
    video_ids = list()
 | 
						|
 | 
						|
    for url in urls:
 | 
						|
        parsed = urlparse(url)
 | 
						|
        nico_id = os.path.split(parsed.path)[-1]
 | 
						|
        video_ids.append(nico_id)
 | 
						|
 | 
						|
    return nico_id
 | 
						|
 | 
						|
 | 
						|
def build_srt(data):
 | 
						|
 | 
						|
    srt_file = pysrt.SubRipFile()
 | 
						|
    for index, proc in enumerate(data):
 | 
						|
        time, text = proc
 | 
						|
        time = datetime.strptime(time, "%M:%S.%f")
 | 
						|
        subtime_st = pysrt.SubRipTime(minutes=time.minute, seconds=time.second,
 | 
						|
                                      milliseconds=time.microsecond / 1000)
 | 
						|
        subtime_end = pysrt.SubRipTime(minutes=time.minute,
 | 
						|
                                       seconds=time.second + 2,
 | 
						|
                                       milliseconds=time.microsecond / 1000)
 | 
						|
        entry = pysrt.SubRipItem(index, subtime_st, subtime_end, text=text)
 | 
						|
        srt_file.append(entry)
 | 
						|
 | 
						|
    return srt_file
 | 
						|
 | 
						|
 | 
						|
class CommentStream(Comment):
 | 
						|
 | 
						|
    srt = False
 | 
						|
 | 
						|
    def saver(self, video_id: str, is_xml: bool,
 | 
						|
              coroutine: asyncio.Task) -> bool:
 | 
						|
 | 
						|
        if is_xml:
 | 
						|
            super().saver(video_id, is_xml, coroutine)
 | 
						|
            return True
 | 
						|
 | 
						|
        comment_data = coroutine.result()
 | 
						|
        conts = list()
 | 
						|
 | 
						|
        data = json.loads(comment_data)
 | 
						|
        contents = [process_element(item)
 | 
						|
                    for item in data if "chat" in item]
 | 
						|
 | 
						|
        file_path = utils.make_name(self.glossary[video_id], "",
 | 
						|
                                    extention="txt")
 | 
						|
        file_srt = utils.make_name(self.glossary[video_id], "",
 | 
						|
                                   extention="srt")
 | 
						|
 | 
						|
        df = pd.DataFrame.from_records(contents,
 | 
						|
                                       columns=["Time", "Comment"])
 | 
						|
        df["Time"] = pd.to_datetime(df["Time"],
 | 
						|
                                    format="%M:%S.%f").dt.time
 | 
						|
        df = df.sort_values(by="Time")
 | 
						|
 | 
						|
        with file_path.open("w", encoding="utf-8") as f:
 | 
						|
            df.to_csv(f, sep="\t", index=False)
 | 
						|
 | 
						|
        if self.srt:
 | 
						|
            srt_data = build_srt(contents)
 | 
						|
            srt_data.save(str(file_srt))
 | 
						|
 | 
						|
        return True
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
 | 
						|
    parser = argparse.ArgumentParser()
 | 
						|
    parser.add_argument("video", help="Video URL(s)", nargs="+")
 | 
						|
    parser.add_argument("-d", "--destination", help="Destination directory",
 | 
						|
                        default="./")
 | 
						|
    parser.add_argument("--no-srt", action="store_false",
 | 
						|
                        help="Don't generate SRT")
 | 
						|
 | 
						|
    options = parser.parse_args()
 | 
						|
    user, _, password = netrc.netrc().hosts["niconico"]
 | 
						|
    video_ids = extract_ids(options.video)
 | 
						|
 | 
						|
    database = Info(video_ids, mail=user, password=password).info
 | 
						|
    com = CommentStream(database, user, password, save_dir=options.destination)
 | 
						|
    com.srt = options.no_srt
 | 
						|
    com.start()
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |