Basic working implementation
Only the command line interface is missing.
This commit is contained in:
parent
464528f698
commit
05bf4b46ca
1 changed files with 127 additions and 52 deletions
177
hyperdia.py
177
hyperdia.py
|
@ -3,16 +3,20 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from itertools import zip_longest
|
from itertools import zip_longest
|
||||||
from typing import NamedTuple, Optional
|
import re
|
||||||
|
from typing import NamedTuple, Optional, List
|
||||||
from urllib.parse import urlparse, urlencode, urlunparse
|
from urllib.parse import urlparse, urlencode, urlunparse
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import more_itertools as mlt
|
import more_itertools as mlt
|
||||||
|
import pandas as pd
|
||||||
import pytz
|
import pytz
|
||||||
import requests
|
import requests
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
HYPERDIA_CGI = "http://www.hyperdia.com/en/cgi/search/en/hyperdia2.cgi"
|
HYPERDIA_CGI = "http://www.hyperdia.com/en/cgi/search/en/hyperdia2.cgi"
|
||||||
HYPERDIA_SEARCH = "http://www.hyperdia.com/en/cgi/en/search.html"
|
HYPERDIA_SEARCH = "http://www.hyperdia.com/en/cgi/en/search.html"
|
||||||
|
GROUP_MATCHER = re.compile(r".*No\.(?P<tracknum>[0-9]{1,}).*")
|
||||||
|
|
||||||
HYPERDIA_PARAMS = {
|
HYPERDIA_PARAMS = {
|
||||||
"dep_node": "",
|
"dep_node": "",
|
||||||
|
@ -58,22 +62,22 @@ class HyperdiaStep:
|
||||||
duration: Optional[str] = None
|
duration: Optional[str] = None
|
||||||
train_name: Optional[str] = None
|
train_name: Optional[str] = None
|
||||||
is_transfer: Optional[bool] = False
|
is_transfer: Optional[bool] = False
|
||||||
|
start_track_number: Optional[int] = None
|
||||||
|
end_track_number: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
def pairwise(iterable):
|
@dataclass
|
||||||
"s -> (s0, s1), (s2, s3), (s4, s5), ..."
|
class HyperdiaTrip:
|
||||||
a = iter(iterable)
|
|
||||||
return zip(a, a)
|
|
||||||
|
|
||||||
|
steps: List[HyperdiaStep]
|
||||||
def grouped(iterable, n):
|
total_distance: int
|
||||||
"""s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1),
|
total_time: int
|
||||||
(s2n,s2n+1,s2n+2,...s3n-1), ..."""
|
total_cost: int
|
||||||
return zip(*[iter(iterable)]*n)
|
transfers: int
|
||||||
|
|
||||||
|
|
||||||
def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
||||||
month="08", year="2020", via=None):
|
month="08", year="2020", max_route=5, via=None):
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
post_params = HYPERDIA_PARAMS.copy()
|
post_params = HYPERDIA_PARAMS.copy()
|
||||||
|
@ -86,6 +90,7 @@ def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
||||||
post_params["month"] = month
|
post_params["month"] = month
|
||||||
post_params["hour"] = hour
|
post_params["hour"] = hour
|
||||||
post_params["minute"] = minute
|
post_params["minute"] = minute
|
||||||
|
post_params["max_route"] = max_route
|
||||||
|
|
||||||
if via is None:
|
if via is None:
|
||||||
for element in ("via_node01", "via_node02", "via_node03"):
|
for element in ("via_node01", "via_node02", "via_node03"):
|
||||||
|
@ -113,34 +118,24 @@ def get_hyperdia_data(start_station, end_station, hour, minute, day="15",
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
#TODO: Adjust this, use the Firefox inspector
|
|
||||||
# For now, keep this in mind:
|
|
||||||
# Odd rows per result: stations
|
|
||||||
# Even rows: Train names, transfers...
|
|
||||||
|
|
||||||
|
def parse_hyperdia_heading(soup):
|
||||||
|
|
||||||
def parse_hyperdia_heading(soup, fare_number=1):
|
# Heading (div class="title_r") with this structure:
|
||||||
|
# First span: total time in minutes
|
||||||
|
# Second span: number of transfers
|
||||||
|
# Third span: total distance in Km
|
||||||
|
# Fourth span: total cost in JPY
|
||||||
|
|
||||||
data = dict()
|
elements = soup.select("span")[0:4]
|
||||||
|
|
||||||
mapping = {1: "total_time", 2: "transfer_num", 3: "total_distance"}
|
total_time, transfers, distance, cost = [item.text.strip()
|
||||||
|
for item in elements]
|
||||||
|
|
||||||
counter = 1
|
cost = int(cost.replace(",", ""))
|
||||||
|
|
||||||
for element in soup.find_all("span", class_="text_blue"):
|
return {"total_time": total_time, "transfers": transfers,
|
||||||
|
"total_distance": distance, "total_cost": cost}
|
||||||
if counter > 3:
|
|
||||||
break
|
|
||||||
|
|
||||||
data[mapping[counter]] = element.text
|
|
||||||
counter += 1
|
|
||||||
|
|
||||||
fare = soup.find("span", {"class": "text_blue",
|
|
||||||
"id": f"fare_total{fare_number}"})
|
|
||||||
fare = int(fare.text.replace(",", ""))
|
|
||||||
data["total_fare"] = fare
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def parse_station_time(element, year, month, day, start=True):
|
def parse_station_time(element, year, month, day, start=True):
|
||||||
|
@ -150,8 +145,12 @@ def parse_station_time(element, year, month, day, start=True):
|
||||||
# Otherwise we get the only item
|
# Otherwise we get the only item
|
||||||
|
|
||||||
current_time = times[-1] if start else times[0]
|
current_time = times[-1] if start else times[0]
|
||||||
station_time = datetime(year, month, day, int(current_time.split(":")[0]),
|
|
||||||
int(current_time.split(":")[1]),
|
hour, minutes = current_time.split(":")
|
||||||
|
|
||||||
|
station_time = datetime(year, int(month), int(day),
|
||||||
|
int(hour),
|
||||||
|
int(minutes),
|
||||||
tzinfo=pytz.timezone("Japan"))
|
tzinfo=pytz.timezone("Japan"))
|
||||||
|
|
||||||
return station_time
|
return station_time
|
||||||
|
@ -165,6 +164,20 @@ def parse_train_name(element):
|
||||||
return list(selected_item.stripped_strings)[0]
|
return list(selected_item.stripped_strings)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_track_number(element):
|
||||||
|
|
||||||
|
# Second span in the station name column contains the track number
|
||||||
|
# if applicable (if not, it's empty)
|
||||||
|
|
||||||
|
track_data = element.select("span")[1].text
|
||||||
|
|
||||||
|
if not track_data:
|
||||||
|
return None
|
||||||
|
|
||||||
|
track_number = int(GROUP_MATCHER.search(track_data)["tracknum"])
|
||||||
|
return track_number
|
||||||
|
|
||||||
|
|
||||||
def parse_hyperdia_table(soup, year, month, day):
|
def parse_hyperdia_table(soup, year, month, day):
|
||||||
|
|
||||||
data = list()
|
data = list()
|
||||||
|
@ -186,6 +199,12 @@ def parse_hyperdia_table(soup, year, month, day):
|
||||||
enddata = end_info.find_all("td")[0:3]
|
enddata = end_info.find_all("td")[0:3]
|
||||||
# Ignore "add to favorities"
|
# Ignore "add to favorities"
|
||||||
start_station_name = list(startdata[2].stripped_strings)[0]
|
start_station_name = list(startdata[2].stripped_strings)[0]
|
||||||
|
|
||||||
|
# Second span in the station name column contains the track number
|
||||||
|
# if applicable (if not, it's empty)
|
||||||
|
start_track_number = parse_track_number(startdata[2])
|
||||||
|
end_track_number = parse_track_number(enddata[2])
|
||||||
|
|
||||||
start_station_time = parse_station_time(startdata[0], year, month, day,
|
start_station_time = parse_station_time(startdata[0], year, month, day,
|
||||||
start=True)
|
start=True)
|
||||||
train_name = parse_train_name(traindata)
|
train_name = parse_train_name(traindata)
|
||||||
|
@ -203,31 +222,87 @@ def parse_hyperdia_table(soup, year, month, day):
|
||||||
end_time=end_station_time,
|
end_time=end_station_time,
|
||||||
train_name=train_name,
|
train_name=train_name,
|
||||||
is_transfer=is_transfer,
|
is_transfer=is_transfer,
|
||||||
duration=duration)
|
duration=duration,
|
||||||
|
start_track_number=start_track_number,
|
||||||
|
end_track_number=end_track_number)
|
||||||
|
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def parse_hyperdia_html(soup):
|
def parse_hyperdia_html(soup, *args, **kwargs):
|
||||||
|
|
||||||
tables = soup.find_all("table", {"class": "table"})
|
tables = soup.find_all("table", {"class": "table"})
|
||||||
titles = soup.find_all("div", {"class": "title2"})
|
headings = soup.find_all("div", {"class": "title_r"})
|
||||||
|
|
||||||
results = list()
|
results = list()
|
||||||
for data in tables:
|
|
||||||
properties = {}
|
|
||||||
extracted = data.find_all(
|
|
||||||
"span", {"class": ["text_16",
|
|
||||||
"text_blue_l", "text_blue_p"]})
|
|
||||||
parsed = list(pairwise(extracted))
|
|
||||||
start = parsed[0]
|
|
||||||
end = parsed[-1]
|
|
||||||
|
|
||||||
properties["start"] = start[1].text
|
for heading, table in zip(headings, tables):
|
||||||
properties["starttime"] = start[0].text
|
|
||||||
properties["end"] = end[1].text
|
parsed_heading = parse_hyperdia_heading(heading)
|
||||||
properties["endtime"] = end[0].text.strip()
|
parsed_table = parse_hyperdia_table(table, *args, **kwargs)
|
||||||
results.append(properties)
|
|
||||||
|
trip = HyperdiaTrip(steps=parsed_table, **parsed_heading)
|
||||||
|
results.append(trip)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def convert_trip_to_table(trip: HyperdiaTrip) -> pd.DataFrame:
|
||||||
|
|
||||||
|
columns = ["From", "Departure time", "Departure track",
|
||||||
|
"To", "Arrival time", "Arrival track", "Duration",
|
||||||
|
"Train / Transfer"]
|
||||||
|
|
||||||
|
rows = list()
|
||||||
|
|
||||||
|
for element in trip.steps:
|
||||||
|
|
||||||
|
start_track_number = ("-" if not element.start_track_number
|
||||||
|
else f"{element.start_track_number:.0f}")
|
||||||
|
end_track_number = ("-" if not element.end_track_number
|
||||||
|
else f"{element.end_track_number:.0f}")
|
||||||
|
|
||||||
|
row = (element.start_station,
|
||||||
|
f"{element.start_time: %H:%M}",
|
||||||
|
start_track_number,
|
||||||
|
element.end_station,
|
||||||
|
f"{element.end_time: %H:%M}",
|
||||||
|
end_track_number,
|
||||||
|
f"{element.duration:.0f} minutes",
|
||||||
|
element.train_name)
|
||||||
|
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
df = pd.DataFrame.from_records(rows, columns=columns)
|
||||||
|
df = df.fillna("-")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def trip_summary(trip: HyperdiaTrip) -> str:
|
||||||
|
|
||||||
|
table = convert_trip_to_table(trip)
|
||||||
|
table = tabulate(table, tablefmt="github", headers="keys", showindex=False)
|
||||||
|
|
||||||
|
summary = (f"Total time: {trip.total_time} minutes,"
|
||||||
|
f" Total distance: {trip.total_distance},"
|
||||||
|
f" Total cost {trip.total_cost} JPY")
|
||||||
|
|
||||||
|
return table + "\n\n" + summary + "\n\n"
|
||||||
|
|
||||||
|
|
||||||
|
def hyperdia_search(start_station: str, end_station: str, hour: int,
|
||||||
|
minute: int, day: int = "15", month: str = "08",
|
||||||
|
year: int = 2020, max_route: int = 5,
|
||||||
|
via: List[str] = None) -> List[str]:
|
||||||
|
|
||||||
|
raw_result = get_hyperdia_data(start_station, end_station,
|
||||||
|
hour, minute, day, month, year, max_route,
|
||||||
|
via)
|
||||||
|
soup = BeautifulSoup(raw_result.text, "html.parser")
|
||||||
|
results = parse_hyperdia_html(soup, year=year, month=month, day=day)
|
||||||
|
|
||||||
|
for trip in results:
|
||||||
|
print(trip_summary(trip))
|
||||||
|
|
Loading…
Add table
Reference in a new issue