Scrape current jpl matchweek every few hours

2026-07-16 17:59:56 +02:00 · 2021-01-24 22:31:09 +01:00 · 2021-01-24 22:31:09 +01:00 · 2b96f3ec41
commit 2b96f3ec41
parent d9d8c6a842
3 changed files with 46 additions and 4 deletions
--- a/functions/scraping.py
+++ b/functions/scraping.py
@ -1,3 +1,5 @@
+import re
+
 from requests import get
 from urllib.parse import urlencode
 from bs4 import BeautifulSoup
@ -49,4 +51,28 @@ def getMatchweek():
    """
    Parses the current JPL matchweek out of Sporza's site
    """
-    pass
+    resp = get("https://sporza.be/nl/categorie/voetbal/jupiler-pro-league/")
+
+    if resp.status_code != 200:
+        return None
+
+    bs = BeautifulSoup(resp.text, "html.parser")
+    matchdays = bs.find_all("section", attrs={"class": "sc-matchdays"})
+
+    if len(matchdays) < 2:
+        return None
+
+    # Table header
+    header = matchdays[1]
+
+    # Regex to find current matchday
+    r = re.compile(r"speeldag\s*\d+", flags=re.I)
+
+    match = r.search(str(header))
+
+    # Something went wrong, just ignore
+    if match is None:
+        return None
+
+    # "Speeldag DD" -> split on space & take second
+    return match[0].split(" ")[1]