Big Bang

author: kartofen <mladenovnasko0@gmail.com> 2022-11-27 18:43:13 +0200
committer: kartofen <mladenovnasko0@gmail.com> 2022-11-27 18:43:13 +0200
commit: 4e96a8a67e1e9a4f85038f09ea77ec3645f85e73 (patch)
tree: d7b5e762910fa48e4026d6e02af94c5218d669a2
1 files changed, 88 insertions, 0 deletions
diff --git a/scraper.py b/scraper.py
new file mode 100755
index 0000000..a7a5c50
--- /dev/null
+++ b/scraper.py
@@ -0,0 +1,88 @@
+#!/usr/bin/python3
+import time
+import json
+import requests
+from datetime import datetime
+from datetime import timedelta
+
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+
+from bs4.element import Comment
+from bs4 import BeautifulSoup
+
+url = 'https://www.foxsports.com/soccer/2022-fifa-world-cup/scores?date=2022-11-20'
+year = '2022'
+
+def day_at(when):
+    date = datetime.now()
+    if when == "Yesterday":
+        date -= timedelta(days = 1)
+    elif when == "Tomorrow":
+        date += timedelta(days = 1)
+    return date.strftime("%a, %b %d")
+
+def write_as_json(filename, data, indent = None):
+    data = {"updated": int(datetime.timestamp(datetime.now())), "days": data}
+
+    if filename == "": # return as string
+        print(json.dumps(data, default=lambda o: o.__dict__, indent = indent))
+        return
+
+    f = open(filename, "w")
+    json.dump(data, f, default=lambda o: o.__dict__, indent = indent)
+    f.close()
+
+
+class Day:
+    def __init__(self, day, matches):
+        self.day = day
+        self.matches = matches
+
+class Match:
+    def __init__(self, matches):
+        data = matches[1].div.contents
+        self.group  = matches[0].text.lower().title() #
+        self.team1  = data[0].contents[1].span.text.lower().title()
+        self.team2  = data[1].contents[1].span.text.lower().title()
+        score1 = data[0].contents[3]
+        score2 = data[1].contents[3]
+        self.score1 = 0 if type(score1) is Comment else int(score1.span.text)
+        self.score2 = 0 if type(score2) is Comment else int(score2.span.text)
+        time = matches[1].contents[1].text
+        state = matches[2].span.text
+        self.time = time if time != "" else (state if state != "FINAL" else "Ended")
+
+    def print(self):
+        print(self.group)
+        print(self.team1, str(self.score1))
+        print(self.team2, str(self.score2))
+        print(self.time)
+
+options = Options()
+options.headless = True
+driver = webdriver.Chrome(options=options, executable_path='chromedriver')
+
+def scrape():
+    driver.get(url)
+    time.sleep(5)
+    soup = BeautifulSoup(driver.page_source, "lxml")
+
+    days = []
+    for day in soup.find_all('div', class_="score-data score-section header-pinned"):
+        matches = []
+        for match in day.select(".score-chip-content"):
+            matches.append(Match(match.contents))
+
+        date = "Sun, Nov 20" # day doesnt have the date in its header
+        if type(day.div.string) is not Comment:
+            date = day.div.div.text.lower().title() # otherwise its all caps
+
+        if len(date.split()) < 3: # if its today tomorrow or yesterday
+            date = day_at(date)
+
+        days.append(Day(date, matches))
+
+    write_as_json("", days, 2)
+
+scrape()
author	kartofen <mladenovnasko0@gmail.com>	2022-11-27 18:43:13 +0200
committer	kartofen <mladenovnasko0@gmail.com>	2022-11-27 18:43:13 +0200
commit	4e96a8a67e1e9a4f85038f09ea77ec3645f85e73 (patch)
tree	d7b5e762910fa48e4026d6e02af94c5218d669a2