From 937b07a61d9075324c1dc94bde92f67adb74169f Mon Sep 17 00:00:00 2001 From: cube Date: Mon, 1 Jun 2026 16:56:43 +0100 Subject: [PATCH] some updates from todays racing and functions added for easier timeline use --- .gitignore | 1 + README.md | 19 +++++---- example_latest_timeline.py | 11 +++++ pcslive.py | 85 +++++++++++++++++++++++++++++++++++--- 4 files changed, 101 insertions(+), 15 deletions(-) create mode 100644 example_latest_timeline.py diff --git a/.gitignore b/.gitignore index 36b13f1..d7e2384 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,4 @@ cython_debug/ # PyPI configuration file .pypirc +test.py diff --git a/README.md b/README.md index b853f9b..65cc27d 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,9 @@ scrapes procyclingstats livestats homepage and timeline -timeline updates are simply the titles, so sometimes you get something like "ranking after x km" or "present riders today from last years top 20" with no details. for now i am not getting the details from underneath, because for the most part they can be ignored. the action timeline headings like "wheel change for x rider" are good enough when they come through. - # todo -- [ ] timeline items with details +- [x] timeline items with details - [ ] pypi package ?? # setup (windows) @@ -31,11 +29,14 @@ pip install beautifulsoup4 pip install requests ``` -# usage +# examples -``` -from pcslive import LiveStats +in any looping example, use a suitable delay to avoid spamming the site with requests -stats = LiveStats() -stats.print_races() -``` +`example_latest_timeline.py` : uses a simple infinite loop (async would be better in a full application) to skim the top of the timeline for live updates. it will not display the same update twice + +`example_live_timeline.py` : another older way to show a live timeline update, less compatible with async routines due to the checking of the last item + +`example_situation.py` : display the provided time gaps (if any) in a readable format + +`example_timeline_all.py` : display the entire timeline of a race (at this moment) - in reverse so that the latest update is at the bottom for console convenience \ No newline at end of file diff --git a/example_latest_timeline.py b/example_latest_timeline.py new file mode 100644 index 0000000..2a5e84a --- /dev/null +++ b/example_latest_timeline.py @@ -0,0 +1,11 @@ +from pcslive import LiveStats +import time + +stats = LiveStats() +race = stats.find_race("giro") +while True: + race.get_timeline() + latest = race.timeline_latest() + if latest: + print(latest) + time.sleep(10) \ No newline at end of file diff --git a/pcslive.py b/pcslive.py index bb47b38..e881a52 100644 --- a/pcslive.py +++ b/pcslive.py @@ -42,6 +42,13 @@ class LiveStats: for race in self.races: race.print_stats() + # finds a race by its title + def find_race(self, query): + for race in self.races: + if query.lower() in race.title.lower(): + return race + return None + @@ -62,6 +69,10 @@ class Race: self.timeline = [] self.situation_long = [] + # when using timeline_latest() put already returned updates + # inside here so that they dont get repeated + self.timeline_latest_store = [] + # to parse the raw data given by LiveStats def refresh_info(self): title_r = self.raw.find(attrs={"class":"title"}) @@ -100,8 +111,8 @@ class Race: if self.url != "None": full_url = "https://www.procyclingstats.com/" + self.url - req = requests.get(full_url) - html = req.text + self.req = requests.get(full_url) + html = self.req.text soup = BeautifulSoup(html, "html.parser") return soup return None @@ -112,15 +123,77 @@ class Race: page = self.get_race_page() if page: all = page.find_all(attrs={"class":"timeline3cont"}) - live = all[0] - timeline = live.find_all("li") + self.timeline_live = all[0] + timeline = self.timeline_live.find_all("li") self.timeline = [] for item in timeline: stat = item.find(attrs={"class":"stat"}) if stat: - stat_content = stat.find(attrs={"class":"textCont"}) - self.timeline.append(self.remove_tags(stat_content)) + stat_content, is_data, has_info_number = self.timeline_stats(stat) + if is_data: + pass + elif has_info_number: + # same as in the timeline_latest function + number = self.remove_tags(has_info_number) + text = self.remove_tags(stat_content) + update = number + " " + text + self.timeline.append(update) + else: + self.timeline.append(self.remove_tags(stat_content)) + # a function for getting only the latest timeline updates! + # useful for making an async timeline feed + def timeline_latest(self): + # its a bit weird if it doesnt display Anything at first run + # so if the seen list is empty, just show the latest update + # from the full timeline (then add that to seen) + # self.timeline_latest_store is the list for storing seen updates + if len(self.timeline_latest_store) == 0: + update = self.timeline[0] + self.timeline_latest_store.append(update) + return update + + # now most of this code is identical to the full timeline + # except that it just uses find instead of find_all + # assuming it finds the first one... which it does + latest = self.timeline_live.find("li") + stat = latest.find(attrs={"class":"stat"}) + if stat: + stat_content, is_data, has_info_number = self.timeline_stats(stat) + if is_data: + pass + elif has_info_number: + # some timeline updates use a big number + # like 150 kilometers to the finish + # and scraper only finds "kilometers to the finish" + # so if there is a big number, get it and add it + # there is a drawback of it doing this for every Big Number + # but i sorta dont care rn haha + number = self.remove_tags(has_info_number) + text = self.remove_tags(stat_content) + update = number + " " + text + + # only show the update if it hasnt been seen before + if update not in self.timeline_latest_store: + # then store it so we know we've already seen it + self.timeline_latest_store.append(update) + return update + else: + # same as above but without the big number + update = self.remove_tags(stat_content) + if update not in self.timeline_latest_store: + self.timeline_latest_store.append(update) + return update + return None + + # function for getting specific things from the timeline + # it seemed like a good idea to put it here at the time + # shrug + def timeline_stats(self, stat): + stat_content = stat.find(attrs={"class":"textCont"}) + is_data = stat.find(attrs={"class":"chartCont"}) + has_info_number = stat.find(attrs={"class":"number"}) + return stat_content, is_data, has_info_number # creates a dictionary containing time gaps as keys and each # timegap points to a list of riders in that group