This commit is contained in:
cube
2026-05-30 18:39:09 +01:00
parent 292de5d130
commit 57a6ebbf35
3 changed files with 131 additions and 1 deletions

View File

@@ -1,3 +1,35 @@
# pcs_live # pcs_live
scrapes procyclingstats livestats for use elsewhere scrapes procyclingstats livestats for use elsewhere
# setup (windows)
after cloning set up a virtualenv
```
py -3 -m venv .venv
.venv\Scripts\activate
```
you may need to run this first before activating the virtualenv
```
Set-ExecutionPolicy Unrestricted -Force
```
install dependencies
```
pip install beautifulsoup4
pip install requests
```
# usage
```
from pcslive import LiveStats
stats = LiveStats()
stats.print_races()
```

11
example.py Normal file
View File

@@ -0,0 +1,11 @@
from pcslive import LiveStats
stats = LiveStats()
if len(stats.races) > 0:
race = stats.races[0]
print("Latest timeline update from", race.title, ":")
race.get_timeline()
print(race.timeline[0])

87
pcslive.py Normal file
View File

@@ -0,0 +1,87 @@
from bs4 import BeautifulSoup
import requests, time
class LiveStats:
def __init__(self):
self.refresh_live()
self.get_races()
def refresh_live(self):
req = requests.get("https://www.procyclingstats.com/")
html = req.text
soup = BeautifulSoup(html, "html.parser")
all = soup.find_all(attrs={"class":"hp3-livestats"})
live = all[0]
for tag in live.find_all(attrs={"class":"inverse"}):
tag.decompose()
self.live = live
def get_races(self):
races_raw = self.live.find_all("li")
self.races = []
for race in races_raw:
this_race = Race(race)
self.races.append(this_race)
def print_races(self):
for race in self.races:
race.print_stats()
class Race:
def __init__(self, raw):
self.raw = raw
self.refresh_info()
def refresh_info(self):
title_r = self.raw.find(attrs={"class":"title"})
status_r = self.raw.find(attrs={"class":"status"})
togo_r = self.raw.find(attrs={"class":"togo"})
situation_r = self.raw.find(attrs={"class":"situ_txt"})
url_r = self.raw.find("a", href=True)
self.title = self.remove_tags(title_r)
self.status = self.remove_tags(status_r)
self.togo = self.remove_tags(togo_r)
self.situation = self.remove_tags(situation_r)
self.url = self.remove_tags(url_r["href"])
def print_stats(self):
print(self.title)
print(self.status)
if self.togo != "None":
print(self.togo, "to go")
print(self.situation)
print("===============")
def print_raw(self):
print(self.raw)
print("")
print(self.url)
def get_timeline(self):
if self.url != "None":
full_url = "https://www.procyclingstats.com/" + self.url
req = requests.get(full_url)
html = req.text
soup = BeautifulSoup(html, "html.parser")
all = soup.find_all(attrs={"class":"timeline3cont"})
live = all[0]
timeline = live.find_all("li")
self.timeline = []
for item in timeline:
stat = item.find(attrs={"class":"stat"})
if stat:
stat_content = stat.find(attrs={"class":"textCont"})
self.timeline.append(self.remove_tags(stat_content))
def remove_tags(self, text):
text = str(text)
text_soup = BeautifulSoup(text, "html.parser")
for data in text_soup(["style", "script"]):
data.decompose()
return " ".join(text_soup.stripped_strings)