diff options
| author | kartofen <mladenovnasko0@gmail.com> | 2022-11-27 19:05:00 +0200 | 
|---|---|---|
| committer | kartofen <mladenovnasko0@gmail.com> | 2022-11-27 19:05:00 +0200 | 
| commit | 35d9b06e872cb35858afb9039a7b30400590d990 (patch) | |
| tree | 0bb340e146ca45132d57f79b7624f8e9ebbe8fad | |
| parent | 4e96a8a67e1e9a4f85038f09ea77ec3645f85e73 (diff) | |
| -rwxr-xr-x | scraper.py | 16 | 
1 files changed, 12 insertions, 4 deletions
| @@ -1,7 +1,9 @@  #!/usr/bin/python3 +import sys  import time  import json  import requests +  from datetime import datetime  from datetime import timedelta @@ -14,6 +16,10 @@ from bs4 import BeautifulSoup  url = 'https://www.foxsports.com/soccer/2022-fifa-world-cup/scores?date=2022-11-20'  year = '2022' +if len(sys.argv) != 3: +    print("You need to provide 2 arguments: filename and time between scrapes") +    exit(1) +  def day_at(when):      date = datetime.now()      if when == "Yesterday": @@ -63,9 +69,9 @@ options = Options()  options.headless = True  driver = webdriver.Chrome(options=options, executable_path='chromedriver') -def scrape(): +def scrape(filename = ""):      driver.get(url) -    time.sleep(5) +    time.sleep(6)      soup = BeautifulSoup(driver.page_source, "lxml")      days = [] @@ -83,6 +89,8 @@ def scrape():          days.append(Day(date, matches)) -    write_as_json("", days, 2) +    write_as_json(filename, days, 2) -scrape() +while 1: +    scrape(sys.argv[1]) +    time.sleep(int(sys.argv[2])) | 
