mirror of
https://github.com/stijndcl/didier.git
synced 2026-04-07 15:48:29 +02:00
Scrape current jpl matchweek every few hours
This commit is contained in:
parent
d9d8c6a842
commit
2b96f3ec41
3 changed files with 46 additions and 4 deletions
|
|
@ -1,3 +1,5 @@
|
|||
import re
|
||||
|
||||
from requests import get
|
||||
from urllib.parse import urlencode
|
||||
from bs4 import BeautifulSoup
|
||||
|
|
@ -49,4 +51,28 @@ def getMatchweek():
|
|||
"""
|
||||
Parses the current JPL matchweek out of Sporza's site
|
||||
"""
|
||||
pass
|
||||
resp = get("https://sporza.be/nl/categorie/voetbal/jupiler-pro-league/")
|
||||
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
|
||||
bs = BeautifulSoup(resp.text, "html.parser")
|
||||
matchdays = bs.find_all("section", attrs={"class": "sc-matchdays"})
|
||||
|
||||
if len(matchdays) < 2:
|
||||
return None
|
||||
|
||||
# Table header
|
||||
header = matchdays[1]
|
||||
|
||||
# Regex to find current matchday
|
||||
r = re.compile(r"speeldag\s*\d+", flags=re.I)
|
||||
|
||||
match = r.search(str(header))
|
||||
|
||||
# Something went wrong, just ignore
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
# "Speeldag DD" -> split on space & take second
|
||||
return match[0].split(" ")[1]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue