restructure scrapers, don't run jpl task on dev

pull/85/head
Stijn De Clercq 2021-08-08 23:24:16 +02:00
parent e07a2c28d1
commit 49aaa76aff
6 changed files with 46 additions and 42 deletions

View File

@ -2,7 +2,7 @@ import discord
from discord.ext import commands
from decorators import help
from enums.help_categories import Category
from functions.scraping import google_search
from functions.scrapers.google import google_search
class Google(commands.Cog):

View File

@ -5,7 +5,7 @@ from enums.numbers import Numbers
from functions import timeFormatters
from functions.config import config
from functions.database import currency, poke, prison, birthdays, stats
from functions.scraping import getMatchweek
from functions.scrapers.sporza import getMatchweek
from functions import ufora_notifications
import json
import random
@ -228,6 +228,10 @@ class Tasks(commands.Cog):
"""
Task that checks the current JPL matchweek & changes the dict value
"""
# Don't run this when testing
if self.client.user.id != int(constants.didierId):
return
matchweek = getMatchweek()
if matchweek is None:

View File

@ -1,7 +1,7 @@
from enum import Enum
from attr import dataclass, field
from functions.timeFormatters import fromString
from functions.scraping import getJPLMatches, getJPLTable
from functions.scrapers.sporza import getJPLMatches, getJPLTable
from functions.stringFormatters import leadingZero
from datetime import datetime
import tabulate

View File

View File

@ -0,0 +1,38 @@
from bs4 import BeautifulSoup
from requests import get
from urllib.parse import urlencode
def google_search(query):
"""
Function to get Google search results
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
query = urlencode({"q": query})
# Get 20 results in case some of them are None
resp = get("https://www.google.com/search?{}&num=20&hl=en".format(query), headers=headers)
if resp.status_code != 200:
return None, resp.status_code
bs = BeautifulSoup(resp.text, "html.parser")
def getContent(element):
"""
Function to find links & titles in the HTML of a <div> element
"""
link = element.find("a", href=True)
title = element.find("h3")
if link is None or title is None:
return None
return link["href"], title.text
divs = bs.find_all("div", attrs={"class": "g"})
return list(getContent(d) for d in divs), 200

View File

@ -1,45 +1,7 @@
from bs4 import BeautifulSoup
import re
from requests import get
from urllib.parse import urlencode
from bs4 import BeautifulSoup
# TODO add Football requests in here as well
def google_search(query):
"""
Function to get Google search results
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
query = urlencode({"q": query})
# Get 20 results in case some of them are None
resp = get("https://www.google.com/search?{}&num=20&hl=en".format(query), headers=headers)
if resp.status_code != 200:
return None, resp.status_code
bs = BeautifulSoup(resp.text, "html.parser")
def getContent(element):
"""
Function to find links & titles in the HTML of a <div> element
"""
link = element.find("a", href=True)
title = element.find("h3")
if link is None or title is None:
return None
return link["href"], title.text
divs = bs.find_all("div", attrs={"class": "g"})
return list(getContent(d) for d in divs), 200
def getMatchweek():