1
0
Fork 0

Make the JSON output a little saner

This commit is contained in:
Luca Beltrame 2020-02-15 14:25:43 +01:00
parent 5f01bf54a2
commit d502b8b79c
Signed by: einar
GPG key ID: 8DF631FD021DB0C5

View file

@ -13,6 +13,7 @@ import more_itertools as mlt
import pandas as pd
import pytz
import requests
import simplejson as json
from tabulate import tabulate
HYPERDIA_CGI = "http://www.hyperdia.com/en/cgi/search/en/hyperdia2.cgi"
@ -37,11 +38,11 @@ HYPERDIA_PARAMS = {
"sum_target": "7",
"facility": "reserved",
"search_target": "route",
"sprexprs": "on",
"sprnozomi": "on",
"slpexprs": "on",
"jr": "on",
"privately": "on",
"sprexprs": "on", # Shinkansen
"sprnozomi": "on", # Shinkansen plus Nozomi/Mizuho
"slpexprs": "on", # 特急 aka limited express
"jr": "on", # JR lines
"privately": "on", # Non-JR lines
"search_way": ""
}
@ -86,6 +87,37 @@ class HyperdiaTrip:
total_time: int
total_cost: int
transfers: int
result_number: Optional[int] = None
# Date in format ISO (YYYY-MM-DD)
travel_date: Optional[str] = None
def _serialize(trip: HyperdiaTrip) -> dict:
structure = dict()
structure["steps"] = list()
for attrib in ("total_distance", "total_time", "total_cost", "transfers",
"result_number", "travel_date"):
structure[attrib] = getattr(trip, attrib)
for step in trip.steps:
subdict = dict()
subdict["start_station"] = step.start_station
subdict["end_station"] = step.end_station
subdict["start_time"] = step.start_time.timestamp()
subdict["end_time"] = step.end_time.timestamp()
subdict["duration"] = step.duration
subdict["train_name"] = step.train_name
subdict["is_transfer"] = step.is_transfer
subdict["start_track_number"] = step.start_track_number
subdict["end_track_number"] = step.end_track_number
structure["steps"].append(subdict)
return structure
def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
@ -204,7 +236,10 @@ def parse_hyperdia_table(soup, year, month, day):
data = list()
previous_is_direct = False
# Skip the heading and the row immediately afterwards (commuter pass)
for group in mlt.windowed(soup.find_all("tr")[2:], n=3, step=2):
# Groups of 3 elements:
@ -218,9 +253,15 @@ def parse_hyperdia_table(soup, year, month, day):
startdata = start_info.find_all("td")[0:3]
traindata = journey_info.find_all("td")[2]
enddata = end_info.find_all("td")[0:3]
# Ignore "add to favorities"
start_station_name = list(startdata[2].stripped_strings)[0]
direct_connection = enddata[1].next_element.get("src")
if direct_connection is not None and "icon_choku.gif" in direct_connection:
previous_is_direct = True
# Second span in the station name column contains the track number
# if applicable (if not, it's empty)
start_track_number = parse_track_number(startdata[2])
@ -229,12 +270,13 @@ def parse_hyperdia_table(soup, year, month, day):
start_station_time = parse_station_time(startdata[0], year, month, day,
start=True)
train_name = parse_train_name(traindata)
end_station_name = list(enddata[2].stripped_strings)[0]
end_station_time = parse_station_time(enddata[0], year, month, day,
start=False)
is_transfer = True if train_name == "Walk" else False
duration = ((end_station_time - start_station_time).seconds / 60) % 60
duration = ((end_station_time - start_station_time).seconds // 60)
entry = HyperdiaStep(
start_station=start_station_name,
@ -264,7 +306,16 @@ def parse_hyperdia_html(soup, *args, **kwargs):
parsed_heading = parse_hyperdia_heading(heading)
parsed_table = parse_hyperdia_table(table, *args, **kwargs)
trip = HyperdiaTrip(steps=parsed_table, **parsed_heading)
if int(kwargs["month"]) > 9:
# Add "0" in front of single-digit months
month = str(kwargs["month"]).zfill(2)
else:
month = kwargs["month"]
travel_date = f'{kwargs["year"]}-{month}-{kwargs["day"]}'
trip = HyperdiaTrip(steps=parsed_table, travel_date=travel_date,
**parsed_heading)
results.append(trip)
return results
@ -327,14 +378,21 @@ def hyperdia_search(start_station: str, end_station: str, hour: int,
soup = BeautifulSoup(raw_result.text, "html.parser")
results = parse_hyperdia_html(soup, year=year, month=month, day=day)
json_data = dict()
json_data["result"] = list()
for index, trip in enumerate(results, start=1):
trip.result_number = index
if output_type == "md":
print(f"##### Route {index}", end="\n\n")
print(trip_summary(trip))
elif output_type == "json":
table = convert_trip_to_table(trip)
print(table.to_json(orient="records"))
json_data["result"].append(_serialize(trip))
if output_type == "json":
print(json.dumps(json_data, indent=2))
def main():