some updates from todays racing and functions added for easier timeline use
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -174,3 +174,4 @@ cython_debug/
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
test.py
|
||||
|
||||
19
README.md
19
README.md
@@ -2,11 +2,9 @@
|
||||
|
||||
scrapes procyclingstats livestats homepage and timeline
|
||||
|
||||
timeline updates are simply the titles, so sometimes you get something like "ranking after x km" or "present riders today from last years top 20" with no details. for now i am not getting the details from underneath, because for the most part they can be ignored. the action timeline headings like "wheel change for x rider" are good enough when they come through.
|
||||
|
||||
# todo
|
||||
|
||||
- [ ] timeline items with details
|
||||
- [x] timeline items with details
|
||||
- [ ] pypi package ??
|
||||
|
||||
# setup (windows)
|
||||
@@ -31,11 +29,14 @@ pip install beautifulsoup4
|
||||
pip install requests
|
||||
```
|
||||
|
||||
# usage
|
||||
# examples
|
||||
|
||||
```
|
||||
from pcslive import LiveStats
|
||||
in any looping example, use a suitable delay to avoid spamming the site with requests
|
||||
|
||||
stats = LiveStats()
|
||||
stats.print_races()
|
||||
```
|
||||
`example_latest_timeline.py` : uses a simple infinite loop (async would be better in a full application) to skim the top of the timeline for live updates. it will not display the same update twice
|
||||
|
||||
`example_live_timeline.py` : another older way to show a live timeline update, less compatible with async routines due to the checking of the last item
|
||||
|
||||
`example_situation.py` : display the provided time gaps (if any) in a readable format
|
||||
|
||||
`example_timeline_all.py` : display the entire timeline of a race (at this moment) - in reverse so that the latest update is at the bottom for console convenience
|
||||
11
example_latest_timeline.py
Normal file
11
example_latest_timeline.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from pcslive import LiveStats
|
||||
import time
|
||||
|
||||
stats = LiveStats()
|
||||
race = stats.find_race("giro")
|
||||
while True:
|
||||
race.get_timeline()
|
||||
latest = race.timeline_latest()
|
||||
if latest:
|
||||
print(latest)
|
||||
time.sleep(10)
|
||||
85
pcslive.py
85
pcslive.py
@@ -42,6 +42,13 @@ class LiveStats:
|
||||
for race in self.races:
|
||||
race.print_stats()
|
||||
|
||||
# finds a race by its title
|
||||
def find_race(self, query):
|
||||
for race in self.races:
|
||||
if query.lower() in race.title.lower():
|
||||
return race
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -62,6 +69,10 @@ class Race:
|
||||
self.timeline = []
|
||||
self.situation_long = []
|
||||
|
||||
# when using timeline_latest() put already returned updates
|
||||
# inside here so that they dont get repeated
|
||||
self.timeline_latest_store = []
|
||||
|
||||
# to parse the raw data given by LiveStats
|
||||
def refresh_info(self):
|
||||
title_r = self.raw.find(attrs={"class":"title"})
|
||||
@@ -100,8 +111,8 @@ class Race:
|
||||
if self.url != "None":
|
||||
full_url = "https://www.procyclingstats.com/" + self.url
|
||||
|
||||
req = requests.get(full_url)
|
||||
html = req.text
|
||||
self.req = requests.get(full_url)
|
||||
html = self.req.text
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
return soup
|
||||
return None
|
||||
@@ -112,15 +123,77 @@ class Race:
|
||||
page = self.get_race_page()
|
||||
if page:
|
||||
all = page.find_all(attrs={"class":"timeline3cont"})
|
||||
live = all[0]
|
||||
timeline = live.find_all("li")
|
||||
self.timeline_live = all[0]
|
||||
timeline = self.timeline_live.find_all("li")
|
||||
self.timeline = []
|
||||
for item in timeline:
|
||||
stat = item.find(attrs={"class":"stat"})
|
||||
if stat:
|
||||
stat_content = stat.find(attrs={"class":"textCont"})
|
||||
self.timeline.append(self.remove_tags(stat_content))
|
||||
stat_content, is_data, has_info_number = self.timeline_stats(stat)
|
||||
if is_data:
|
||||
pass
|
||||
elif has_info_number:
|
||||
# same as in the timeline_latest function
|
||||
number = self.remove_tags(has_info_number)
|
||||
text = self.remove_tags(stat_content)
|
||||
update = number + " " + text
|
||||
self.timeline.append(update)
|
||||
else:
|
||||
self.timeline.append(self.remove_tags(stat_content))
|
||||
|
||||
# a function for getting only the latest timeline updates!
|
||||
# useful for making an async timeline feed
|
||||
def timeline_latest(self):
|
||||
# its a bit weird if it doesnt display Anything at first run
|
||||
# so if the seen list is empty, just show the latest update
|
||||
# from the full timeline (then add that to seen)
|
||||
# self.timeline_latest_store is the list for storing seen updates
|
||||
if len(self.timeline_latest_store) == 0:
|
||||
update = self.timeline[0]
|
||||
self.timeline_latest_store.append(update)
|
||||
return update
|
||||
|
||||
# now most of this code is identical to the full timeline
|
||||
# except that it just uses find instead of find_all
|
||||
# assuming it finds the first one... which it does
|
||||
latest = self.timeline_live.find("li")
|
||||
stat = latest.find(attrs={"class":"stat"})
|
||||
if stat:
|
||||
stat_content, is_data, has_info_number = self.timeline_stats(stat)
|
||||
if is_data:
|
||||
pass
|
||||
elif has_info_number:
|
||||
# some timeline updates use a big number
|
||||
# like 150 kilometers to the finish
|
||||
# and scraper only finds "kilometers to the finish"
|
||||
# so if there is a big number, get it and add it
|
||||
# there is a drawback of it doing this for every Big Number
|
||||
# but i sorta dont care rn haha
|
||||
number = self.remove_tags(has_info_number)
|
||||
text = self.remove_tags(stat_content)
|
||||
update = number + " " + text
|
||||
|
||||
# only show the update if it hasnt been seen before
|
||||
if update not in self.timeline_latest_store:
|
||||
# then store it so we know we've already seen it
|
||||
self.timeline_latest_store.append(update)
|
||||
return update
|
||||
else:
|
||||
# same as above but without the big number
|
||||
update = self.remove_tags(stat_content)
|
||||
if update not in self.timeline_latest_store:
|
||||
self.timeline_latest_store.append(update)
|
||||
return update
|
||||
return None
|
||||
|
||||
# function for getting specific things from the timeline
|
||||
# it seemed like a good idea to put it here at the time
|
||||
# shrug
|
||||
def timeline_stats(self, stat):
|
||||
stat_content = stat.find(attrs={"class":"textCont"})
|
||||
is_data = stat.find(attrs={"class":"chartCont"})
|
||||
has_info_number = stat.find(attrs={"class":"number"})
|
||||
return stat_content, is_data, has_info_number
|
||||
|
||||
# creates a dictionary containing time gaps as keys and each
|
||||
# timegap points to a list of riders in that group
|
||||
|
||||
Reference in New Issue
Block a user