diff --git a/hyperdia.py b/hyperdia.py index 971e8e5..4425abb 100755 --- a/hyperdia.py +++ b/hyperdia.py @@ -13,6 +13,7 @@ import more_itertools as mlt import pandas as pd import pytz import requests +import simplejson as json from tabulate import tabulate HYPERDIA_CGI = "http://www.hyperdia.com/en/cgi/search/en/hyperdia2.cgi" @@ -37,11 +38,11 @@ HYPERDIA_PARAMS = { "sum_target": "7", "facility": "reserved", "search_target": "route", - "sprexprs": "on", - "sprnozomi": "on", - "slpexprs": "on", - "jr": "on", - "privately": "on", + "sprexprs": "on", # Shinkansen + "sprnozomi": "on", # Shinkansen plus Nozomi/Mizuho + "slpexprs": "on", # 特急 aka limited express + "jr": "on", # JR lines + "privately": "on", # Non-JR lines "search_way": "" } @@ -86,6 +87,37 @@ class HyperdiaTrip: total_time: int total_cost: int transfers: int + result_number: Optional[int] = None + # Date in format ISO (YYYY-MM-DD) + travel_date: Optional[str] = None + + +def _serialize(trip: HyperdiaTrip) -> dict: + + structure = dict() + structure["steps"] = list() + + for attrib in ("total_distance", "total_time", "total_cost", "transfers", + "result_number", "travel_date"): + structure[attrib] = getattr(trip, attrib) + + for step in trip.steps: + + subdict = dict() + + subdict["start_station"] = step.start_station + subdict["end_station"] = step.end_station + subdict["start_time"] = step.start_time.timestamp() + subdict["end_time"] = step.end_time.timestamp() + subdict["duration"] = step.duration + subdict["train_name"] = step.train_name + subdict["is_transfer"] = step.is_transfer + subdict["start_track_number"] = step.start_track_number + subdict["end_track_number"] = step.end_track_number + + structure["steps"].append(subdict) + + return structure def get_hyperdia_data(start_station, end_station, hour, minute, day="15", @@ -204,7 +236,10 @@ def parse_hyperdia_table(soup, year, month, day): data = list() + previous_is_direct = False + # Skip the heading and the row immediately afterwards (commuter pass) + for group in mlt.windowed(soup.find_all("tr")[2:], n=3, step=2): # Groups of 3 elements: @@ -218,9 +253,15 @@ def parse_hyperdia_table(soup, year, month, day): startdata = start_info.find_all("td")[0:3] traindata = journey_info.find_all("td")[2] enddata = end_info.find_all("td")[0:3] + # Ignore "add to favorities" start_station_name = list(startdata[2].stripped_strings)[0] + direct_connection = enddata[1].next_element.get("src") + + if direct_connection is not None and "icon_choku.gif" in direct_connection: + previous_is_direct = True + # Second span in the station name column contains the track number # if applicable (if not, it's empty) start_track_number = parse_track_number(startdata[2]) @@ -229,12 +270,13 @@ def parse_hyperdia_table(soup, year, month, day): start_station_time = parse_station_time(startdata[0], year, month, day, start=True) train_name = parse_train_name(traindata) + end_station_name = list(enddata[2].stripped_strings)[0] end_station_time = parse_station_time(enddata[0], year, month, day, start=False) is_transfer = True if train_name == "Walk" else False - duration = ((end_station_time - start_station_time).seconds / 60) % 60 + duration = ((end_station_time - start_station_time).seconds // 60) entry = HyperdiaStep( start_station=start_station_name, @@ -264,7 +306,16 @@ def parse_hyperdia_html(soup, *args, **kwargs): parsed_heading = parse_hyperdia_heading(heading) parsed_table = parse_hyperdia_table(table, *args, **kwargs) - trip = HyperdiaTrip(steps=parsed_table, **parsed_heading) + if int(kwargs["month"]) > 9: + # Add "0" in front of single-digit months + month = str(kwargs["month"]).zfill(2) + else: + month = kwargs["month"] + + travel_date = f'{kwargs["year"]}-{month}-{kwargs["day"]}' + + trip = HyperdiaTrip(steps=parsed_table, travel_date=travel_date, + **parsed_heading) results.append(trip) return results @@ -327,14 +378,21 @@ def hyperdia_search(start_station: str, end_station: str, hour: int, soup = BeautifulSoup(raw_result.text, "html.parser") results = parse_hyperdia_html(soup, year=year, month=month, day=day) + json_data = dict() + json_data["result"] = list() + for index, trip in enumerate(results, start=1): + trip.result_number = index + if output_type == "md": print(f"##### Route {index}", end="\n\n") print(trip_summary(trip)) elif output_type == "json": - table = convert_trip_to_table(trip) - print(table.to_json(orient="records")) + json_data["result"].append(_serialize(trip)) + + if output_type == "json": + print(json.dumps(json_data, indent=2)) def main():