1
0
Fork 0

Make the JSON output a little saner

This commit is contained in:
Luca Beltrame 2020-02-15 14:25:43 +01:00
parent 5f01bf54a2
commit d502b8b79c
Signed by: einar
GPG key ID: 8DF631FD021DB0C5

View file

@ -13,6 +13,7 @@ import more_itertools as mlt
import pandas as pd import pandas as pd
import pytz import pytz
import requests import requests
import simplejson as json
from tabulate import tabulate from tabulate import tabulate
HYPERDIA_CGI = "http://www.hyperdia.com/en/cgi/search/en/hyperdia2.cgi" HYPERDIA_CGI = "http://www.hyperdia.com/en/cgi/search/en/hyperdia2.cgi"
@ -37,11 +38,11 @@ HYPERDIA_PARAMS = {
"sum_target": "7", "sum_target": "7",
"facility": "reserved", "facility": "reserved",
"search_target": "route", "search_target": "route",
"sprexprs": "on", "sprexprs": "on", # Shinkansen
"sprnozomi": "on", "sprnozomi": "on", # Shinkansen plus Nozomi/Mizuho
"slpexprs": "on", "slpexprs": "on", # 特急 aka limited express
"jr": "on", "jr": "on", # JR lines
"privately": "on", "privately": "on", # Non-JR lines
"search_way": "" "search_way": ""
} }
@ -86,6 +87,37 @@ class HyperdiaTrip:
total_time: int total_time: int
total_cost: int total_cost: int
transfers: int transfers: int
result_number: Optional[int] = None
# Date in format ISO (YYYY-MM-DD)
travel_date: Optional[str] = None
def _serialize(trip: HyperdiaTrip) -> dict:
structure = dict()
structure["steps"] = list()
for attrib in ("total_distance", "total_time", "total_cost", "transfers",
"result_number", "travel_date"):
structure[attrib] = getattr(trip, attrib)
for step in trip.steps:
subdict = dict()
subdict["start_station"] = step.start_station
subdict["end_station"] = step.end_station
subdict["start_time"] = step.start_time.timestamp()
subdict["end_time"] = step.end_time.timestamp()
subdict["duration"] = step.duration
subdict["train_name"] = step.train_name
subdict["is_transfer"] = step.is_transfer
subdict["start_track_number"] = step.start_track_number
subdict["end_track_number"] = step.end_track_number
structure["steps"].append(subdict)
return structure
def get_hyperdia_data(start_station, end_station, hour, minute, day="15", def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
@ -204,7 +236,10 @@ def parse_hyperdia_table(soup, year, month, day):
data = list() data = list()
previous_is_direct = False
# Skip the heading and the row immediately afterwards (commuter pass) # Skip the heading and the row immediately afterwards (commuter pass)
for group in mlt.windowed(soup.find_all("tr")[2:], n=3, step=2): for group in mlt.windowed(soup.find_all("tr")[2:], n=3, step=2):
# Groups of 3 elements: # Groups of 3 elements:
@ -218,9 +253,15 @@ def parse_hyperdia_table(soup, year, month, day):
startdata = start_info.find_all("td")[0:3] startdata = start_info.find_all("td")[0:3]
traindata = journey_info.find_all("td")[2] traindata = journey_info.find_all("td")[2]
enddata = end_info.find_all("td")[0:3] enddata = end_info.find_all("td")[0:3]
# Ignore "add to favorities" # Ignore "add to favorities"
start_station_name = list(startdata[2].stripped_strings)[0] start_station_name = list(startdata[2].stripped_strings)[0]
direct_connection = enddata[1].next_element.get("src")
if direct_connection is not None and "icon_choku.gif" in direct_connection:
previous_is_direct = True
# Second span in the station name column contains the track number # Second span in the station name column contains the track number
# if applicable (if not, it's empty) # if applicable (if not, it's empty)
start_track_number = parse_track_number(startdata[2]) start_track_number = parse_track_number(startdata[2])
@ -229,12 +270,13 @@ def parse_hyperdia_table(soup, year, month, day):
start_station_time = parse_station_time(startdata[0], year, month, day, start_station_time = parse_station_time(startdata[0], year, month, day,
start=True) start=True)
train_name = parse_train_name(traindata) train_name = parse_train_name(traindata)
end_station_name = list(enddata[2].stripped_strings)[0] end_station_name = list(enddata[2].stripped_strings)[0]
end_station_time = parse_station_time(enddata[0], year, month, day, end_station_time = parse_station_time(enddata[0], year, month, day,
start=False) start=False)
is_transfer = True if train_name == "Walk" else False is_transfer = True if train_name == "Walk" else False
duration = ((end_station_time - start_station_time).seconds / 60) % 60 duration = ((end_station_time - start_station_time).seconds // 60)
entry = HyperdiaStep( entry = HyperdiaStep(
start_station=start_station_name, start_station=start_station_name,
@ -264,7 +306,16 @@ def parse_hyperdia_html(soup, *args, **kwargs):
parsed_heading = parse_hyperdia_heading(heading) parsed_heading = parse_hyperdia_heading(heading)
parsed_table = parse_hyperdia_table(table, *args, **kwargs) parsed_table = parse_hyperdia_table(table, *args, **kwargs)
trip = HyperdiaTrip(steps=parsed_table, **parsed_heading) if int(kwargs["month"]) > 9:
# Add "0" in front of single-digit months
month = str(kwargs["month"]).zfill(2)
else:
month = kwargs["month"]
travel_date = f'{kwargs["year"]}-{month}-{kwargs["day"]}'
trip = HyperdiaTrip(steps=parsed_table, travel_date=travel_date,
**parsed_heading)
results.append(trip) results.append(trip)
return results return results
@ -327,14 +378,21 @@ def hyperdia_search(start_station: str, end_station: str, hour: int,
soup = BeautifulSoup(raw_result.text, "html.parser") soup = BeautifulSoup(raw_result.text, "html.parser")
results = parse_hyperdia_html(soup, year=year, month=month, day=day) results = parse_hyperdia_html(soup, year=year, month=month, day=day)
json_data = dict()
json_data["result"] = list()
for index, trip in enumerate(results, start=1): for index, trip in enumerate(results, start=1):
trip.result_number = index
if output_type == "md": if output_type == "md":
print(f"##### Route {index}", end="\n\n") print(f"##### Route {index}", end="\n\n")
print(trip_summary(trip)) print(trip_summary(trip))
elif output_type == "json": elif output_type == "json":
table = convert_trip_to_table(trip) json_data["result"].append(_serialize(trip))
print(table.to_json(orient="records"))
if output_type == "json":
print(json.dumps(json_data, indent=2))
def main(): def main():