restructure scrapers, don't run jpl task on dev

2021-08-08 23:24:16 +02:00 · 2021-08-08 23:24:16 +02:00 · 49aaa76aff
parent e07a2c28d1
commit 49aaa76aff
6 changed files with 46 additions and 42 deletions
--- a/cogs/google.py
+++ b/cogs/google.py
@ -2,7 +2,7 @@ import discord
 from discord.ext import commands
 from decorators import help
 from enums.help_categories import Category
-from functions.scraping import google_search
+from functions.scrapers.google import google_search
 class Google(commands.Cog):
--- a/cogs/tasks.py
+++ b/cogs/tasks.py
@ -5,7 +5,7 @@ from enums.numbers import Numbers
 from functions import timeFormatters
 from functions.config import config
 from functions.database import currency, poke, prison, birthdays, stats
-from functions.scraping import getMatchweek
+from functions.scrapers.sporza import getMatchweek
 from functions import ufora_notifications
 import json
 import random
@ -228,6 +228,10 @@ class Tasks(commands.Cog):
        """
        Task that checks the current JPL matchweek & changes the dict value
        """
        # Don't run this when testing
        if self.client.user.id != int(constants.didierId):
            return
        matchweek = getMatchweek()
        if matchweek is None:
--- a/functions/football.py
+++ b/functions/football.py
@ -1,7 +1,7 @@
 from enum import Enum
 from attr import dataclass, field
 from functions.timeFormatters import fromString
-from functions.scraping import getJPLMatches, getJPLTable
+from functions.scrapers.sporza import getJPLMatches, getJPLTable
 from functions.stringFormatters import leadingZero
 from datetime import datetime
 import tabulate
--- a/functions/scrapers/init.py
+++ b/functions/scrapers/init.py
--- a/functions/scrapers/google.py
+++ b/functions/scrapers/google.py
@ -0,0 +1,38 @@
 from bs4 import BeautifulSoup
 from requests import get
 from urllib.parse import urlencode
 def google_search(query):
    """
    Function to get Google search results
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
    }
    query = urlencode({"q": query})
    # Get 20 results in case some of them are None
    resp = get("https://www.google.com/search?{}&num=20&hl=en".format(query), headers=headers)
    if resp.status_code != 200:
        return None, resp.status_code
    bs = BeautifulSoup(resp.text, "html.parser")
    def getContent(element):
        """
        Function to find links & titles in the HTML of a <div> element
        """
        link = element.find("a", href=True)
        title = element.find("h3")
        if link is None or title is None:
            return None
        return link["href"], title.text
    divs = bs.find_all("div", attrs={"class": "g"})
    return list(getContent(d) for d in divs), 200
--- a/functions/scrapers/sporza.py
+++ b/functions/scrapers/sporza.py
@ -1,45 +1,7 @@
 from bs4 import BeautifulSoup
 import re
 from requests import get
 from urllib.parse import urlencode
 from bs4 import BeautifulSoup
 # TODO add Football requests in here as well
 def google_search(query):
    """
    Function to get Google search results
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
    }
    query = urlencode({"q": query})
    # Get 20 results in case some of them are None
    resp = get("https://www.google.com/search?{}&num=20&hl=en".format(query), headers=headers)
    if resp.status_code != 200:
        return None, resp.status_code
    bs = BeautifulSoup(resp.text, "html.parser")
    def getContent(element):
        """
        Function to find links & titles in the HTML of a <div> element
        """
        link = element.find("a", href=True)
        title = element.find("h3")
        if link is None or title is None:
            return None
        return link["href"], title.text
    divs = bs.find_all("div", attrs={"class": "g"})
    return list(getContent(d) for d in divs), 200
 def getMatchweek():