mirror of https://github.com/stijndcl/didier
restructure scrapers, don't run jpl task on dev
parent
e07a2c28d1
commit
49aaa76aff
|
@ -2,7 +2,7 @@ import discord
|
||||||
from discord.ext import commands
|
from discord.ext import commands
|
||||||
from decorators import help
|
from decorators import help
|
||||||
from enums.help_categories import Category
|
from enums.help_categories import Category
|
||||||
from functions.scraping import google_search
|
from functions.scrapers.google import google_search
|
||||||
|
|
||||||
|
|
||||||
class Google(commands.Cog):
|
class Google(commands.Cog):
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enums.numbers import Numbers
|
||||||
from functions import timeFormatters
|
from functions import timeFormatters
|
||||||
from functions.config import config
|
from functions.config import config
|
||||||
from functions.database import currency, poke, prison, birthdays, stats
|
from functions.database import currency, poke, prison, birthdays, stats
|
||||||
from functions.scraping import getMatchweek
|
from functions.scrapers.sporza import getMatchweek
|
||||||
from functions import ufora_notifications
|
from functions import ufora_notifications
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
|
@ -228,6 +228,10 @@ class Tasks(commands.Cog):
|
||||||
"""
|
"""
|
||||||
Task that checks the current JPL matchweek & changes the dict value
|
Task that checks the current JPL matchweek & changes the dict value
|
||||||
"""
|
"""
|
||||||
|
# Don't run this when testing
|
||||||
|
if self.client.user.id != int(constants.didierId):
|
||||||
|
return
|
||||||
|
|
||||||
matchweek = getMatchweek()
|
matchweek = getMatchweek()
|
||||||
|
|
||||||
if matchweek is None:
|
if matchweek is None:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from attr import dataclass, field
|
from attr import dataclass, field
|
||||||
from functions.timeFormatters import fromString
|
from functions.timeFormatters import fromString
|
||||||
from functions.scraping import getJPLMatches, getJPLTable
|
from functions.scrapers.sporza import getJPLMatches, getJPLTable
|
||||||
from functions.stringFormatters import leadingZero
|
from functions.stringFormatters import leadingZero
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import tabulate
|
import tabulate
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from requests import get
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
|
||||||
|
def google_search(query):
|
||||||
|
"""
|
||||||
|
Function to get Google search results
|
||||||
|
"""
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
|
||||||
|
}
|
||||||
|
|
||||||
|
query = urlencode({"q": query})
|
||||||
|
|
||||||
|
# Get 20 results in case some of them are None
|
||||||
|
resp = get("https://www.google.com/search?{}&num=20&hl=en".format(query), headers=headers)
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return None, resp.status_code
|
||||||
|
|
||||||
|
bs = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
def getContent(element):
|
||||||
|
"""
|
||||||
|
Function to find links & titles in the HTML of a <div> element
|
||||||
|
"""
|
||||||
|
link = element.find("a", href=True)
|
||||||
|
title = element.find("h3")
|
||||||
|
|
||||||
|
if link is None or title is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return link["href"], title.text
|
||||||
|
|
||||||
|
divs = bs.find_all("div", attrs={"class": "g"})
|
||||||
|
|
||||||
|
return list(getContent(d) for d in divs), 200
|
|
@ -1,45 +1,7 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from requests import get
|
from requests import get
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
# TODO add Football requests in here as well
|
|
||||||
|
|
||||||
|
|
||||||
def google_search(query):
|
|
||||||
"""
|
|
||||||
Function to get Google search results
|
|
||||||
"""
|
|
||||||
headers = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
|
|
||||||
}
|
|
||||||
|
|
||||||
query = urlencode({"q": query})
|
|
||||||
|
|
||||||
# Get 20 results in case some of them are None
|
|
||||||
resp = get("https://www.google.com/search?{}&num=20&hl=en".format(query), headers=headers)
|
|
||||||
|
|
||||||
if resp.status_code != 200:
|
|
||||||
return None, resp.status_code
|
|
||||||
|
|
||||||
bs = BeautifulSoup(resp.text, "html.parser")
|
|
||||||
|
|
||||||
def getContent(element):
|
|
||||||
"""
|
|
||||||
Function to find links & titles in the HTML of a <div> element
|
|
||||||
"""
|
|
||||||
link = element.find("a", href=True)
|
|
||||||
title = element.find("h3")
|
|
||||||
|
|
||||||
if link is None or title is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return link["href"], title.text
|
|
||||||
|
|
||||||
divs = bs.find_all("div", attrs={"class": "g"})
|
|
||||||
|
|
||||||
return list(getContent(d) for d in divs), 200
|
|
||||||
|
|
||||||
|
|
||||||
def getMatchweek():
|
def getMatchweek():
|
Loading…
Reference in New Issue