from bs4 import BeautifulSoup import requests, time class LiveStats: def __init__(self): self.refresh_live() self.get_races() self.timeline = [] self.situation_long = [] def refresh_live(self): req = requests.get("https://www.procyclingstats.com/") html = req.text soup = BeautifulSoup(html, "html.parser") all = soup.find_all(attrs={"class":"hp3-livestats"}) live = all[0] for tag in live.find_all(attrs={"class":"inverse"}): tag.decompose() self.live = live def get_races(self): races_raw = self.live.find_all("li") self.races = [] for race in races_raw: this_race = Race(race) self.races.append(this_race) def print_races(self): for race in self.races: race.print_stats() class Race: def __init__(self, raw): self.raw = raw self.refresh_info() def refresh_info(self): title_r = self.raw.find(attrs={"class":"title"}) status_r = self.raw.find(attrs={"class":"status"}) togo_r = self.raw.find(attrs={"class":"togo"}) situation_r = self.raw.find(attrs={"class":"situ_txt"}) url_r = self.raw.find("a", href=True) self.title = self.remove_tags(title_r) self.status = self.remove_tags(status_r) self.togo = self.remove_tags(togo_r) self.situation = self.remove_tags(situation_r) self.url = self.remove_tags(url_r["href"]) def print_stats(self): print(self.title) print(self.status) if self.togo != "None": print(self.togo, "to go") print(self.situation) print("===============") def print_raw(self): print(self.raw) print("") print(self.url) def get_race_page(self): if self.url != "None": full_url = "https://www.procyclingstats.com/" + self.url req = requests.get(full_url) html = req.text soup = BeautifulSoup(html, "html.parser") return soup return None def get_timeline(self): page = self.get_race_page() if page: all = page.find_all(attrs={"class":"timeline3cont"}) live = all[0] timeline = live.find_all("li") self.timeline = [] for item in timeline: stat = item.find(attrs={"class":"stat"}) if stat: stat_content = stat.find(attrs={"class":"textCont"}) self.timeline.append(self.remove_tags(stat_content)) def get_situation_long(self): page = self.get_race_page() if page: all = page.find_all(attrs={"class":"situCont"}) live = all[0] situation_long = live.find_all("li") self.situation_long = {} last_timegap = None for item in situation_long: #print(item) time_gap = item.find(attrs={"class":"time"}) group_name = item.find(attrs={"class":"groupname"}) rider = item.find(attrs={"class":"maxw200"}) if time_gap: tg = self.remove_tags(time_gap) self.situation_long[tg] = [] last_timegap = tg rider_name = self.remove_tags(rider) if rider_name not in self.situation_long[last_timegap]: self.situation_long[last_timegap].append(rider_name) #print(self.remove_tags(time_gap), self.remove_tags(group_name), self.remove_tags(riders)) def remove_tags(self, text): text = str(text) text_soup = BeautifulSoup(text, "html.parser") for data in text_soup(["style", "script"]): data.decompose() return " ".join(text_soup.stripped_strings)