diff --git a/README.md b/README.md index 2967017..da2e18c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,35 @@ # pcs_live -scrapes procyclingstats livestats for use elsewhere \ No newline at end of file +scrapes procyclingstats livestats for use elsewhere + +# setup (windows) + +after cloning set up a virtualenv + +``` +py -3 -m venv .venv +.venv\Scripts\activate +``` + +you may need to run this first before activating the virtualenv + +``` +Set-ExecutionPolicy Unrestricted -Force +``` + +install dependencies + +``` +pip install beautifulsoup4 +pip install requests +``` + +# usage + +``` +from pcslive import LiveStats + +stats = LiveStats() +stats.print_races() +``` + diff --git a/example.py b/example.py new file mode 100644 index 0000000..feada44 --- /dev/null +++ b/example.py @@ -0,0 +1,11 @@ +from pcslive import LiveStats + +stats = LiveStats() + +if len(stats.races) > 0: + race = stats.races[0] + print("Latest timeline update from", race.title, ":") + + race.get_timeline() + + print(race.timeline[0]) \ No newline at end of file diff --git a/pcslive.py b/pcslive.py new file mode 100644 index 0000000..dd4a184 --- /dev/null +++ b/pcslive.py @@ -0,0 +1,87 @@ +from bs4 import BeautifulSoup +import requests, time + +class LiveStats: + def __init__(self): + self.refresh_live() + self.get_races() + + def refresh_live(self): + req = requests.get("https://www.procyclingstats.com/") + html = req.text + soup = BeautifulSoup(html, "html.parser") + + all = soup.find_all(attrs={"class":"hp3-livestats"}) + live = all[0] + for tag in live.find_all(attrs={"class":"inverse"}): + tag.decompose() + + self.live = live + + def get_races(self): + races_raw = self.live.find_all("li") + self.races = [] + for race in races_raw: + this_race = Race(race) + self.races.append(this_race) + + def print_races(self): + for race in self.races: + race.print_stats() + +class Race: + def __init__(self, raw): + self.raw = raw + self.refresh_info() + + def refresh_info(self): + title_r = self.raw.find(attrs={"class":"title"}) + status_r = self.raw.find(attrs={"class":"status"}) + togo_r = self.raw.find(attrs={"class":"togo"}) + situation_r = self.raw.find(attrs={"class":"situ_txt"}) + url_r = self.raw.find("a", href=True) + + self.title = self.remove_tags(title_r) + self.status = self.remove_tags(status_r) + self.togo = self.remove_tags(togo_r) + self.situation = self.remove_tags(situation_r) + self.url = self.remove_tags(url_r["href"]) + + def print_stats(self): + print(self.title) + print(self.status) + if self.togo != "None": + print(self.togo, "to go") + print(self.situation) + print("===============") + + def print_raw(self): + print(self.raw) + print("") + print(self.url) + + def get_timeline(self): + if self.url != "None": + full_url = "https://www.procyclingstats.com/" + self.url + + req = requests.get(full_url) + html = req.text + soup = BeautifulSoup(html, "html.parser") + + all = soup.find_all(attrs={"class":"timeline3cont"}) + live = all[0] + timeline = live.find_all("li") + self.timeline = [] + for item in timeline: + stat = item.find(attrs={"class":"stat"}) + if stat: + stat_content = stat.find(attrs={"class":"textCont"}) + self.timeline.append(self.remove_tags(stat_content)) + + + def remove_tags(self, text): + text = str(text) + text_soup = BeautifulSoup(text, "html.parser") + for data in text_soup(["style", "script"]): + data.decompose() + return " ".join(text_soup.stripped_strings) \ No newline at end of file