#!/usr/bin/python3 import time import json import requests from datetime import datetime from datetime import timedelta from selenium import webdriver from selenium.webdriver.chrome.options import Options from bs4.element import Comment from bs4 import BeautifulSoup url = 'https://www.foxsports.com/soccer/2022-fifa-world-cup/scores?date=2022-11-20' year = '2022' def day_at(when): date = datetime.now() if when == "Yesterday": date -= timedelta(days = 1) elif when == "Tomorrow": date += timedelta(days = 1) return date.strftime("%a, %b %d") def write_as_json(filename, data, indent = None): data = {"updated": int(datetime.timestamp(datetime.now())), "days": data} if filename == "": # return as string print(json.dumps(data, default=lambda o: o.__dict__, indent = indent)) return f = open(filename, "w") json.dump(data, f, default=lambda o: o.__dict__, indent = indent) f.close() class Day: def __init__(self, day, matches): self.day = day self.matches = matches class Match: def __init__(self, matches): data = matches[1].div.contents self.group = matches[0].text.lower().title() # self.team1 = data[0].contents[1].span.text.lower().title() self.team2 = data[1].contents[1].span.text.lower().title() score1 = data[0].contents[3] score2 = data[1].contents[3] self.score1 = 0 if type(score1) is Comment else int(score1.span.text) self.score2 = 0 if type(score2) is Comment else int(score2.span.text) time = matches[1].contents[1].text state = matches[2].span.text self.time = time if time != "" else (state if state != "FINAL" else "Ended") def print(self): print(self.group) print(self.team1, str(self.score1)) print(self.team2, str(self.score2)) print(self.time) options = Options() options.headless = True driver = webdriver.Chrome(options=options, executable_path='chromedriver') def scrape(): driver.get(url) time.sleep(5) soup = BeautifulSoup(driver.page_source, "lxml") days = [] for day in soup.find_all('div', class_="score-data score-section header-pinned"): matches = [] for match in day.select(".score-chip-content"): matches.append(Match(match.contents)) date = "Sun, Nov 20" # day doesnt have the date in its header if type(day.div.string) is not Comment: date = day.div.div.text.lower().title() # otherwise its all caps if len(date.split()) < 3: # if its today tomorrow or yesterday date = day_at(date) days.append(Day(date, matches)) write_as_json("", days, 2) scrape()