comments
This commit is contained in:
@@ -42,6 +42,3 @@ stats.print_races()
|
|||||||
`example.py` posts the latest timeline of one current race (checking every 10 seconds for updates)
|
`example.py` posts the latest timeline of one current race (checking every 10 seconds for updates)
|
||||||
|
|
||||||
`example2.py` organises the situation diagram in a dictionary of timegap keys to a list of riders in that group. it could probably be organised better but it works
|
`example2.py` organises the situation diagram in a dictionary of timegap keys to a list of riders in that group. it could probably be organised better but it works
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
10
example.py
10
example.py
@@ -3,15 +3,21 @@ import time
|
|||||||
|
|
||||||
stats = LiveStats()
|
stats = LiveStats()
|
||||||
|
|
||||||
|
# if there are live races...
|
||||||
if len(stats.races) > 0:
|
if len(stats.races) > 0:
|
||||||
race = stats.races[0]
|
race = stats.races[0] # just grab the first one for example's sake
|
||||||
print("Latest timeline update from", race.title, ":")
|
print("Latest timeline update from", race.title, ":")
|
||||||
|
|
||||||
|
# keep the last update so we don't spam the terminal
|
||||||
last_update = ""
|
last_update = ""
|
||||||
while True:
|
while True:
|
||||||
race.get_timeline()
|
race.get_timeline()
|
||||||
|
|
||||||
|
# only update terminal if the newest item has changed (is new)
|
||||||
if last_update != race.timeline[0]:
|
if last_update != race.timeline[0]:
|
||||||
print(race.timeline[0])
|
print(race.timeline[0])
|
||||||
last_update = race.timeline[0]
|
last_update = race.timeline[0]
|
||||||
|
|
||||||
|
# wait ten seconds before checking again
|
||||||
|
# maybe set to longer if running for extended time
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
@@ -3,10 +3,12 @@ import time
|
|||||||
|
|
||||||
stats = LiveStats()
|
stats = LiveStats()
|
||||||
|
|
||||||
|
# if there are live races...
|
||||||
if len(stats.races) > 0:
|
if len(stats.races) > 0:
|
||||||
race = stats.races[0]
|
race = stats.races[0]
|
||||||
|
|
||||||
race.get_situation_long()
|
race.get_situation_long()
|
||||||
|
|
||||||
|
# whats the situation? :)
|
||||||
for x in race.situation_long:
|
for x in race.situation_long:
|
||||||
print(x, race.situation_long[x])
|
print(x, race.situation_long[x])
|
||||||
109
pcslive.py
109
pcslive.py
@@ -1,42 +1,65 @@
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests, time
|
import requests, time
|
||||||
|
|
||||||
|
# class for storing the live races
|
||||||
class LiveStats:
|
class LiveStats:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.refresh_live()
|
self.refresh_races()
|
||||||
self.get_races()
|
|
||||||
|
|
||||||
self.timeline = []
|
# scrapes from the PCS homepage where the little green live stats boxes are
|
||||||
self.situation_long = []
|
# it also gets rid of all the horrible polygon stuff
|
||||||
|
def refresh_races(self):
|
||||||
def refresh_live(self):
|
# the entire front page of PCS loaded into soup
|
||||||
req = requests.get("https://www.procyclingstats.com/")
|
req = requests.get("https://www.procyclingstats.com/")
|
||||||
html = req.text
|
html = req.text
|
||||||
soup = BeautifulSoup(html, "html.parser")
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
all = soup.find_all(attrs={"class":"hp3-livestats"})
|
# narrow it down to the little green live stats boxes showing us what
|
||||||
live = all[0]
|
# races are currently live
|
||||||
|
live = soup.find(attrs={"class":"hp3-livestats"})
|
||||||
|
|
||||||
|
# also get rid of the horrible polygon number stuff !!! YUCK
|
||||||
|
# i imagine its how the profiles are drawn. there is A LOT
|
||||||
|
# and it makes reading the raw html very PAINFUL
|
||||||
for tag in live.find_all(attrs={"class":"inverse"}):
|
for tag in live.find_all(attrs={"class":"inverse"}):
|
||||||
tag.decompose()
|
tag.decompose()
|
||||||
|
|
||||||
self.live = live
|
# conveniently, every race is a list item (<li>)
|
||||||
|
# inside is the title, status (live/finished), km to go,
|
||||||
def get_races(self):
|
# and brief situation text, but we parse that inside Race
|
||||||
races_raw = self.live.find_all("li")
|
races = live.find_all("li")
|
||||||
self.races = []
|
self.races = []
|
||||||
for race in races_raw:
|
for raw_race in races:
|
||||||
this_race = Race(race)
|
# for now just create the Race object and add it to our list
|
||||||
self.races.append(this_race)
|
race = Race(raw_race)
|
||||||
|
self.races.append(race)
|
||||||
|
|
||||||
|
# straightforward
|
||||||
def print_races(self):
|
def print_races(self):
|
||||||
for race in self.races:
|
for race in self.races:
|
||||||
race.print_stats()
|
race.print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# class for storing information about individual live races
|
||||||
class Race:
|
class Race:
|
||||||
def __init__(self, raw):
|
def __init__(self, raw):
|
||||||
|
# this is the unparsed html from LiveStats
|
||||||
self.raw = raw
|
self.raw = raw
|
||||||
|
|
||||||
|
# now go parse it
|
||||||
self.refresh_info()
|
self.refresh_info()
|
||||||
|
|
||||||
|
# keeping these empty unless requested becauses i feel like it
|
||||||
|
# timeline is headings from the race's page (left side)
|
||||||
|
# situation_long is groups from the left side and time gaps
|
||||||
|
# (not to be confused with situation which is a brief homepage summary)
|
||||||
|
self.timeline = []
|
||||||
|
self.situation_long = []
|
||||||
|
|
||||||
|
# to parse the raw data given by LiveStats
|
||||||
def refresh_info(self):
|
def refresh_info(self):
|
||||||
title_r = self.raw.find(attrs={"class":"title"})
|
title_r = self.raw.find(attrs={"class":"title"})
|
||||||
status_r = self.raw.find(attrs={"class":"status"})
|
status_r = self.raw.find(attrs={"class":"status"})
|
||||||
@@ -44,25 +67,32 @@ class Race:
|
|||||||
situation_r = self.raw.find(attrs={"class":"situ_txt"})
|
situation_r = self.raw.find(attrs={"class":"situ_txt"})
|
||||||
url_r = self.raw.find("a", href=True)
|
url_r = self.raw.find("a", href=True)
|
||||||
|
|
||||||
self.title = self.remove_tags(title_r)
|
self.title = self.remove_tags(title_r) # name of the race
|
||||||
self.status = self.remove_tags(status_r)
|
self.status = self.remove_tags(status_r) # live or finished (finished races stay online for a short while after finishing)
|
||||||
self.togo = self.remove_tags(togo_r)
|
self.togo = self.remove_tags(togo_r) # how many kms to go (None if finished)
|
||||||
self.situation = self.remove_tags(situation_r)
|
self.situation = self.remove_tags(situation_r) # a brief summary of the situation - None if finished, and - if complete peloton
|
||||||
self.url = self.remove_tags(url_r["href"])
|
self.url = self.remove_tags(url_r["href"]) # the url for the race's full live stats page
|
||||||
|
|
||||||
|
# self explanatory
|
||||||
def print_stats(self):
|
def print_stats(self):
|
||||||
print(self.title)
|
print(self.title)
|
||||||
print(self.status)
|
print(self.status)
|
||||||
|
|
||||||
|
# dont bother showing kms to go or situation if finished
|
||||||
if self.togo != "None":
|
if self.togo != "None":
|
||||||
print(self.togo, "to go")
|
print(self.togo, "to go")
|
||||||
print(self.situation)
|
print(self.situation)
|
||||||
|
|
||||||
print("===============")
|
print("===============")
|
||||||
|
|
||||||
def print_raw(self):
|
# for testing
|
||||||
print(self.raw)
|
# def print_raw(self):
|
||||||
print("")
|
# print(self.raw)
|
||||||
print(self.url)
|
# print("")
|
||||||
|
# print(self.url)
|
||||||
|
|
||||||
|
# this gets the html of the entire page of stats for this particular race
|
||||||
|
# for use with get_timeline() and get_situation_long()
|
||||||
def get_race_page(self):
|
def get_race_page(self):
|
||||||
if self.url != "None":
|
if self.url != "None":
|
||||||
full_url = "https://www.procyclingstats.com/" + self.url
|
full_url = "https://www.procyclingstats.com/" + self.url
|
||||||
@@ -73,6 +103,8 @@ class Race:
|
|||||||
return soup
|
return soup
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# fills a list with all displayed timeline items (headings only)
|
||||||
|
# item 0 is the most recent update
|
||||||
def get_timeline(self):
|
def get_timeline(self):
|
||||||
page = self.get_race_page()
|
page = self.get_race_page()
|
||||||
if page:
|
if page:
|
||||||
@@ -86,31 +118,42 @@ class Race:
|
|||||||
stat_content = stat.find(attrs={"class":"textCont"})
|
stat_content = stat.find(attrs={"class":"textCont"})
|
||||||
self.timeline.append(self.remove_tags(stat_content))
|
self.timeline.append(self.remove_tags(stat_content))
|
||||||
|
|
||||||
|
|
||||||
|
# creates a dictionary containing time gaps as keys and each
|
||||||
|
# timegap points to a list of riders in that group
|
||||||
def get_situation_long(self):
|
def get_situation_long(self):
|
||||||
page = self.get_race_page()
|
page = self.get_race_page()
|
||||||
if page:
|
if page:
|
||||||
all = page.find_all(attrs={"class":"situCont"})
|
all = page.find_all(attrs={"class":"situCont"})
|
||||||
live = all[0]
|
live = all[0]
|
||||||
situation_long = live.find_all("li")
|
situation_long = live.find_all("li")
|
||||||
|
|
||||||
|
# create the dictionary
|
||||||
self.situation_long = {}
|
self.situation_long = {}
|
||||||
|
|
||||||
|
# last_timegap is used for grouping riders together
|
||||||
last_timegap = None
|
last_timegap = None
|
||||||
|
|
||||||
for item in situation_long:
|
for item in situation_long:
|
||||||
#print(item)
|
|
||||||
time_gap = item.find(attrs={"class":"time"})
|
time_gap = item.find(attrs={"class":"time"})
|
||||||
group_name = item.find(attrs={"class":"groupname"})
|
|
||||||
rider = item.find(attrs={"class":"maxw200"})
|
rider = item.find(attrs={"class":"maxw200"})
|
||||||
|
|
||||||
|
# time gap is only listed once and subsequent riders in that
|
||||||
|
# group don't have one (None)
|
||||||
if time_gap:
|
if time_gap:
|
||||||
tg = self.remove_tags(time_gap)
|
tg = self.remove_tags(time_gap) # get rid of html tags
|
||||||
self.situation_long[tg] = []
|
self.situation_long[tg] = [] # create the list inside the dict
|
||||||
last_timegap = tg
|
last_timegap = tg # set last timegap for the loop
|
||||||
rider_name = self.remove_tags(rider)
|
|
||||||
|
rider_name = self.remove_tags(rider) # remove tags from rider name
|
||||||
|
# the leading rider is basically the group name i guess?
|
||||||
|
# anyway we dont need it twice
|
||||||
if rider_name not in self.situation_long[last_timegap]:
|
if rider_name not in self.situation_long[last_timegap]:
|
||||||
|
# add rider to list of riders under that timegap/group
|
||||||
self.situation_long[last_timegap].append(rider_name)
|
self.situation_long[last_timegap].append(rider_name)
|
||||||
|
|
||||||
#print(self.remove_tags(time_gap), self.remove_tags(group_name), self.remove_tags(riders))
|
# remove surrounding html tags from final data points,
|
||||||
|
# like rider names, race names, etc.
|
||||||
|
|
||||||
def remove_tags(self, text):
|
def remove_tags(self, text):
|
||||||
text = str(text)
|
text = str(text)
|
||||||
text_soup = BeautifulSoup(text, "html.parser")
|
text_soup = BeautifulSoup(text, "html.parser")
|
||||||
|
|||||||
Reference in New Issue
Block a user