Basic working implementation
Only the command line interface is missing.
This commit is contained in:
parent
464528f698
commit
05bf4b46ca
1 changed files with 127 additions and 52 deletions
179
hyperdia.py
179
hyperdia.py
|
@ -3,16 +3,20 @@
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from itertools import zip_longest
|
||||
from typing import NamedTuple, Optional
|
||||
import re
|
||||
from typing import NamedTuple, Optional, List
|
||||
from urllib.parse import urlparse, urlencode, urlunparse
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import more_itertools as mlt
|
||||
import pandas as pd
|
||||
import pytz
|
||||
import requests
|
||||
from tabulate import tabulate
|
||||
|
||||
HYPERDIA_CGI = "http://www.hyperdia.com/en/cgi/search/en/hyperdia2.cgi"
|
||||
HYPERDIA_SEARCH = "http://www.hyperdia.com/en/cgi/en/search.html"
|
||||
GROUP_MATCHER = re.compile(r".*No\.(?P<tracknum>[0-9]{1,}).*")
|
||||
|
||||
HYPERDIA_PARAMS = {
|
||||
"dep_node": "",
|
||||
|
@ -58,22 +62,22 @@ class HyperdiaStep:
|
|||
duration: Optional[str] = None
|
||||
train_name: Optional[str] = None
|
||||
is_transfer: Optional[bool] = False
|
||||
start_track_number: Optional[int] = None
|
||||
end_track_number: Optional[int] = None
|
||||
|
||||
|
||||
def pairwise(iterable):
|
||||
"s -> (s0, s1), (s2, s3), (s4, s5), ..."
|
||||
a = iter(iterable)
|
||||
return zip(a, a)
|
||||
@dataclass
|
||||
class HyperdiaTrip:
|
||||
|
||||
|
||||
def grouped(iterable, n):
|
||||
"""s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1),
|
||||
(s2n,s2n+1,s2n+2,...s3n-1), ..."""
|
||||
return zip(*[iter(iterable)]*n)
|
||||
steps: List[HyperdiaStep]
|
||||
total_distance: int
|
||||
total_time: int
|
||||
total_cost: int
|
||||
transfers: int
|
||||
|
||||
|
||||
def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
||||
month="08", year="2020", via=None):
|
||||
month="08", year="2020", max_route=5, via=None):
|
||||
|
||||
session = requests.Session()
|
||||
post_params = HYPERDIA_PARAMS.copy()
|
||||
|
@ -86,6 +90,7 @@ def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
|||
post_params["month"] = month
|
||||
post_params["hour"] = hour
|
||||
post_params["minute"] = minute
|
||||
post_params["max_route"] = max_route
|
||||
|
||||
if via is None:
|
||||
for element in ("via_node01", "via_node02", "via_node03"):
|
||||
|
@ -98,7 +103,7 @@ def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
|||
for node, station in zip_longest(
|
||||
via,
|
||||
("via_node01", "via_node02", "via_node03"),
|
||||
fill_value=""):
|
||||
fill_value=""):
|
||||
|
||||
post_params[node] = station
|
||||
|
||||
|
@ -113,34 +118,24 @@ def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
|||
|
||||
return result
|
||||
|
||||
#TODO: Adjust this, use the Firefox inspector
|
||||
# For now, keep this in mind:
|
||||
# Odd rows per result: stations
|
||||
# Even rows: Train names, transfers...
|
||||
|
||||
def parse_hyperdia_heading(soup):
|
||||
|
||||
def parse_hyperdia_heading(soup, fare_number=1):
|
||||
# Heading (div class="title_r") with this structure:
|
||||
# First span: total time in minutes
|
||||
# Second span: number of transfers
|
||||
# Third span: total distance in Km
|
||||
# Fourth span: total cost in JPY
|
||||
|
||||
data = dict()
|
||||
elements = soup.select("span")[0:4]
|
||||
|
||||
mapping = {1: "total_time", 2: "transfer_num", 3: "total_distance"}
|
||||
total_time, transfers, distance, cost = [item.text.strip()
|
||||
for item in elements]
|
||||
|
||||
counter = 1
|
||||
cost = int(cost.replace(",", ""))
|
||||
|
||||
for element in soup.find_all("span", class_="text_blue"):
|
||||
|
||||
if counter > 3:
|
||||
break
|
||||
|
||||
data[mapping[counter]] = element.text
|
||||
counter += 1
|
||||
|
||||
fare = soup.find("span", {"class": "text_blue",
|
||||
"id": f"fare_total{fare_number}"})
|
||||
fare = int(fare.text.replace(",", ""))
|
||||
data["total_fare"] = fare
|
||||
|
||||
return data
|
||||
return {"total_time": total_time, "transfers": transfers,
|
||||
"total_distance": distance, "total_cost": cost}
|
||||
|
||||
|
||||
def parse_station_time(element, year, month, day, start=True):
|
||||
|
@ -150,8 +145,12 @@ def parse_station_time(element, year, month, day, start=True):
|
|||
# Otherwise we get the only item
|
||||
|
||||
current_time = times[-1] if start else times[0]
|
||||
station_time = datetime(year, month, day, int(current_time.split(":")[0]),
|
||||
int(current_time.split(":")[1]),
|
||||
|
||||
hour, minutes = current_time.split(":")
|
||||
|
||||
station_time = datetime(year, int(month), int(day),
|
||||
int(hour),
|
||||
int(minutes),
|
||||
tzinfo=pytz.timezone("Japan"))
|
||||
|
||||
return station_time
|
||||
|
@ -165,6 +164,20 @@ def parse_train_name(element):
|
|||
return list(selected_item.stripped_strings)[0]
|
||||
|
||||
|
||||
def parse_track_number(element):
|
||||
|
||||
# Second span in the station name column contains the track number
|
||||
# if applicable (if not, it's empty)
|
||||
|
||||
track_data = element.select("span")[1].text
|
||||
|
||||
if not track_data:
|
||||
return None
|
||||
|
||||
track_number = int(GROUP_MATCHER.search(track_data)["tracknum"])
|
||||
return track_number
|
||||
|
||||
|
||||
def parse_hyperdia_table(soup, year, month, day):
|
||||
|
||||
data = list()
|
||||
|
@ -186,6 +199,12 @@ def parse_hyperdia_table(soup, year, month, day):
|
|||
enddata = end_info.find_all("td")[0:3]
|
||||
# Ignore "add to favorities"
|
||||
start_station_name = list(startdata[2].stripped_strings)[0]
|
||||
|
||||
# Second span in the station name column contains the track number
|
||||
# if applicable (if not, it's empty)
|
||||
start_track_number = parse_track_number(startdata[2])
|
||||
end_track_number = parse_track_number(enddata[2])
|
||||
|
||||
start_station_time = parse_station_time(startdata[0], year, month, day,
|
||||
start=True)
|
||||
train_name = parse_train_name(traindata)
|
||||
|
@ -203,31 +222,87 @@ def parse_hyperdia_table(soup, year, month, day):
|
|||
end_time=end_station_time,
|
||||
train_name=train_name,
|
||||
is_transfer=is_transfer,
|
||||
duration=duration)
|
||||
duration=duration,
|
||||
start_track_number=start_track_number,
|
||||
end_track_number=end_track_number)
|
||||
|
||||
data.append(entry)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def parse_hyperdia_html(soup):
|
||||
def parse_hyperdia_html(soup, *args, **kwargs):
|
||||
|
||||
tables = soup.find_all("table", {"class": "table"})
|
||||
titles = soup.find_all("div", {"class": "title2"})
|
||||
headings = soup.find_all("div", {"class": "title_r"})
|
||||
|
||||
results = list()
|
||||
for data in tables:
|
||||
properties = {}
|
||||
extracted = data.find_all(
|
||||
"span", {"class": ["text_16",
|
||||
"text_blue_l", "text_blue_p"]})
|
||||
parsed = list(pairwise(extracted))
|
||||
start = parsed[0]
|
||||
end = parsed[-1]
|
||||
|
||||
properties["start"] = start[1].text
|
||||
properties["starttime"] = start[0].text
|
||||
properties["end"] = end[1].text
|
||||
properties["endtime"] = end[0].text.strip()
|
||||
results.append(properties)
|
||||
for heading, table in zip(headings, tables):
|
||||
|
||||
parsed_heading = parse_hyperdia_heading(heading)
|
||||
parsed_table = parse_hyperdia_table(table, *args, **kwargs)
|
||||
|
||||
trip = HyperdiaTrip(steps=parsed_table, **parsed_heading)
|
||||
results.append(trip)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def convert_trip_to_table(trip: HyperdiaTrip) -> pd.DataFrame:
|
||||
|
||||
columns = ["From", "Departure time", "Departure track",
|
||||
"To", "Arrival time", "Arrival track", "Duration",
|
||||
"Train / Transfer"]
|
||||
|
||||
rows = list()
|
||||
|
||||
for element in trip.steps:
|
||||
|
||||
start_track_number = ("-" if not element.start_track_number
|
||||
else f"{element.start_track_number:.0f}")
|
||||
end_track_number = ("-" if not element.end_track_number
|
||||
else f"{element.end_track_number:.0f}")
|
||||
|
||||
row = (element.start_station,
|
||||
f"{element.start_time: %H:%M}",
|
||||
start_track_number,
|
||||
element.end_station,
|
||||
f"{element.end_time: %H:%M}",
|
||||
end_track_number,
|
||||
f"{element.duration:.0f} minutes",
|
||||
element.train_name)
|
||||
|
||||
rows.append(row)
|
||||
|
||||
df = pd.DataFrame.from_records(rows, columns=columns)
|
||||
df = df.fillna("-")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def trip_summary(trip: HyperdiaTrip) -> str:
|
||||
|
||||
table = convert_trip_to_table(trip)
|
||||
table = tabulate(table, tablefmt="github", headers="keys", showindex=False)
|
||||
|
||||
summary = (f"Total time: {trip.total_time} minutes,"
|
||||
f" Total distance: {trip.total_distance},"
|
||||
f" Total cost {trip.total_cost} JPY")
|
||||
|
||||
return table + "\n\n" + summary + "\n\n"
|
||||
|
||||
|
||||
def hyperdia_search(start_station: str, end_station: str, hour: int,
|
||||
minute: int, day: int = "15", month: str = "08",
|
||||
year: int = 2020, max_route: int = 5,
|
||||
via: List[str] = None) -> List[str]:
|
||||
|
||||
raw_result = get_hyperdia_data(start_station, end_station,
|
||||
hour, minute, day, month, year, max_route,
|
||||
via)
|
||||
soup = BeautifulSoup(raw_result.text, "html.parser")
|
||||
results = parse_hyperdia_html(soup, year=year, month=month, day=day)
|
||||
|
||||
for trip in results:
|
||||
print(trip_summary(trip))
|
||||
|
|
Loading…
Add table
Reference in a new issue