ayaya

2026-05-30 18:39:09 +01:00
parent 292de5d130
commit 57a6ebbf35
3 changed files with 131 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -1,3 +1,35 @@
 # pcs_live
-scrapes procyclingstats livestats for use elsewhere
+scrapes procyclingstats livestats for use elsewhere
 # setup (windows)
 after cloning set up a virtualenv
 ```
 py -3 -m venv .venv
 .venv\Scripts\activate
 ```
 you may need to run this first before activating the virtualenv
 ```
 Set-ExecutionPolicy Unrestricted -Force
 ```
 install dependencies
 ```
 pip install beautifulsoup4
 pip install requests
 ```
 # usage
 ```
 from pcslive import LiveStats
 stats = LiveStats()
 stats.print_races()
 ```
--- a/example.py
+++ b/example.py
@@ -0,0 +1,11 @@
 from pcslive import LiveStats
 stats = LiveStats()
 if len(stats.races) > 0:
    race = stats.races[0]
    print("Latest timeline update from", race.title, ":")
    race.get_timeline()
    print(race.timeline[0])
--- a/pcslive.py
+++ b/pcslive.py
@@ -0,0 +1,87 @@
 from bs4 import BeautifulSoup
 import requests, time
 class LiveStats:
    def __init__(self):
        self.refresh_live()
        self.get_races()
    def refresh_live(self):
        req = requests.get("https://www.procyclingstats.com/")
        html = req.text
        soup = BeautifulSoup(html, "html.parser")
        all = soup.find_all(attrs={"class":"hp3-livestats"})
        live = all[0]
        for tag in live.find_all(attrs={"class":"inverse"}):
            tag.decompose()
        self.live = live
    def get_races(self):
        races_raw = self.live.find_all("li")
        self.races = []
        for race in races_raw:
            this_race = Race(race)
            self.races.append(this_race)
    def print_races(self):
        for race in self.races:
            race.print_stats()
 class Race:
    def __init__(self, raw):
        self.raw = raw
        self.refresh_info()
    def refresh_info(self):
        title_r = self.raw.find(attrs={"class":"title"})
        status_r = self.raw.find(attrs={"class":"status"})
        togo_r = self.raw.find(attrs={"class":"togo"})
        situation_r = self.raw.find(attrs={"class":"situ_txt"})
        url_r = self.raw.find("a", href=True)
        self.title = self.remove_tags(title_r)
        self.status = self.remove_tags(status_r)
        self.togo = self.remove_tags(togo_r)
        self.situation = self.remove_tags(situation_r)
        self.url = self.remove_tags(url_r["href"])
    def print_stats(self):
        print(self.title)
        print(self.status)
        if self.togo != "None":
            print(self.togo, "to go")
            print(self.situation)
        print("===============")
    def print_raw(self):
        print(self.raw)
        print("")
        print(self.url)
    def get_timeline(self):
        if self.url != "None":
            full_url = "https://www.procyclingstats.com/" + self.url
            req = requests.get(full_url)
            html = req.text
            soup = BeautifulSoup(html, "html.parser")
            all = soup.find_all(attrs={"class":"timeline3cont"})
            live = all[0]
            timeline = live.find_all("li")
            self.timeline = []
            for item in timeline:
                stat = item.find(attrs={"class":"stat"})
                if stat:
                    stat_content = stat.find(attrs={"class":"textCont"})
                    self.timeline.append(self.remove_tags(stat_content))
    def remove_tags(self, text):
        text = str(text)
        text_soup = BeautifulSoup(text, "html.parser")
        for data in text_soup(["style", "script"]):
            data.decompose()
        return " ".join(text_soup.stripped_strings)