From ed0649c953cff2b339d68f6212fccc957091dd9d Mon Sep 17 00:00:00 2001 From: Stijn De Clercq Date: Tue, 31 Aug 2021 00:13:27 +0200 Subject: [PATCH] Clean google search up a bit --- cogs/google.py | 39 ++++++++++++++---------------------- functions/scrapers/google.py | 27 +++++++++++++++++++++---- 2 files changed, 38 insertions(+), 28 deletions(-) diff --git a/cogs/google.py b/cogs/google.py index 2bc8bfb..310403d 100644 --- a/cogs/google.py +++ b/cogs/google.py @@ -3,29 +3,24 @@ from discord.ext import commands from dislash import slash_command, SlashInteraction, Option, OptionType from decorators import help from enums.help_categories import Category -from functions.scrapers.google import google_search +from functions.scrapers.google import google_search, SearchResult -def _create_google_embed(results) -> discord.Embed: - # Filter out all Nones - elements = list(filter(lambda x: x is not None, results)) - +def _create_google_embed(result: SearchResult) -> discord.Embed: embed = discord.Embed(colour=discord.Colour.blue()) embed.set_author(name="Google Search") # Empty list of results - if len(elements) == 0: + if len(result.results) == 0: + embed.colour = discord.Colour.red() embed.description = "Geen resultaten gevonden." return embed - # Cut excess results out - if len(elements) > 10: - elements = elements[:10] - + # Add results into a field links = [] - for index, (link, title) in enumerate(elements): - links.append("{}: [{}]({})".format(index + 1, title, link)) + for index, link in enumerate(result.results): + links.append(f"{index + 1}: {link}") embed.description = "\n".join(links) @@ -48,30 +43,26 @@ class Google(commands.Cog): guild_ids=[880175869841277008] ) async def _google_slash(self, interaction: SlashInteraction, query: str): - results, status = google_search(query) + result = google_search(query) - if results is None: - return await interaction.reply("Er ging iets fout (Response {})".format(status)) + if not result.results: + return await interaction.reply("Er ging iets fout (Response {})".format(result.status_code)) - embed = _create_google_embed(results) + embed = _create_google_embed(result) await interaction.reply(embed=embed) - @slash_command(name="test", description="Test") - async def test(self, interaction): - await interaction.reply(":eyes:") - @commands.command(name="Google", aliases=["Gtfm", "Search"], usage="[Query]", case_insensitive=True) @help.Category(Category.Other) async def google(self, ctx, *query): if not query: return await ctx.reply("Je hebt geen query opgegeven.", mention_author=True) - results, status = google_search(" ".join(query)) + result = google_search(" ".join(query)) - if results is None: - return await ctx.send("Er ging iets fout (Response {})".format(status)) + if not result.results: + return await ctx.send("Er ging iets fout (Response {})".format(result.status_code)) - embed = _create_google_embed(results) + embed = _create_google_embed(result) await ctx.reply(embed=embed, mention_author=False) diff --git a/functions/scrapers/google.py b/functions/scrapers/google.py index 4b7aefa..71fec5d 100644 --- a/functions/scrapers/google.py +++ b/functions/scrapers/google.py @@ -1,9 +1,18 @@ +from typing import Optional, List + from bs4 import BeautifulSoup +from dataclasses import dataclass from requests import get from urllib.parse import urlencode -def google_search(query): +@dataclass +class SearchResult: + status_code: int + results: List[str] + + +def google_search(query) -> SearchResult: """ Function to get Google search results """ @@ -17,7 +26,7 @@ def google_search(query): resp = get("https://www.google.com/search?{}&num=20&hl=en".format(query), headers=headers) if resp.status_code != 200: - return None, resp.status_code + return SearchResult(resp.status_code, []) bs = BeautifulSoup(resp.text, "html.parser") @@ -28,11 +37,21 @@ def google_search(query): link = element.find("a", href=True) title = element.find("h3") - if link is None or title is None: + if link is None or not link["href"].startswith(("http://", "https://",)) or title is None: return None return link["href"], title.text divs = bs.find_all("div", attrs={"class": "g"}) - return list(getContent(d) for d in divs), 200 + results = list(getContent(d) for d in divs) + + # Filter out Nones + results = list(filter(lambda x: x is not None, results)) + + # Map to urls + links = [] + for (link, title) in results: + links.append(f"[{title}]({link})") + + return SearchResult(200, links[:10])