diff --git a/didier/data/embeds/google/google_search.py b/didier/data/embeds/google/google_search.py index 605a5fa..9a8eefc 100644 --- a/didier/data/embeds/google/google_search.py +++ b/didier/data/embeds/google/google_search.py @@ -1,6 +1,7 @@ from http import HTTPStatus import discord +from overrides import overrides from didier.data.embeds.base import EmbedBaseModel from didier.data.scrapers.google import SearchData @@ -31,6 +32,7 @@ class GoogleSearch(EmbedBaseModel): return embed + @overrides def to_embed(self) -> discord.Embed: if not self.data.results or self.data.status_code != HTTPStatus.OK: return self._error_embed() diff --git a/didier/data/scrapers/google.py b/didier/data/scrapers/google.py index d9a34c2..9ebb003 100644 --- a/didier/data/scrapers/google.py +++ b/didier/data/scrapers/google.py @@ -1,4 +1,5 @@ import http +import typing from dataclasses import dataclass, field from typing import Optional from urllib.parse import unquote_plus, urlencode @@ -28,19 +29,25 @@ def get_result_stats(bs: BeautifulSoup) -> Optional[str]: Example result: "About 16.570.000 results (0,84 seconds)" """ - stats = bs.find("div", id="result-stats").text - return stats and stats.removesuffix("\xa0") + stats = bs.find("div", id="result-stats") + if stats is None: + return None + + return stats.text.removesuffix("\xa0") def parse_result(element: Tag) -> Optional[str]: """Parse 1 wrapper into a link""" a_tag = element.find("a", href=True) - url = a_tag["href"] - title = a_tag.find("h3") + if a_tag is None: + return None + + url = a_tag["href"] # type: ignore + title = typing.cast(Tag, a_tag.find("h3")) if ( url is None - or not url.startswith( + or not str(url).startswith( ( "http://", "https://", @@ -57,7 +64,8 @@ def parse_result(element: Tag) -> Optional[str]: def get_search_results(bs: BeautifulSoup) -> list[str]: """Parse the search results""" result_wrappers = bs.find_all("div", class_="g") - results = filter(lambda x: x is not None, map(parse_result, result_wrappers)) + + results: list[str] = list(result for result in map(parse_result, result_wrappers) if result is not None) # Remove duplicates # (sets don't preserve the order!) @@ -67,7 +75,8 @@ def get_search_results(bs: BeautifulSoup) -> list[str]: async def google_search(http_client: ClientSession, query: str): """Get the first 10 Google search results""" headers = { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/83.0.4103.97 Safari/537.36" } query = urlencode({"q": query}) @@ -82,4 +91,4 @@ async def google_search(http_client: ClientSession, query: str): result_stats = get_result_stats(bs) results = get_search_results(bs) - return SearchData(query, 200, results[:10], result_stats) + return SearchData(query, 200, results[:10], result_stats or "") diff --git a/requirements-dev.txt b/requirements-dev.txt index 533740c..d82fde3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,6 +6,7 @@ pytest==7.1.2 pytest-asyncio==0.18.3 pytest-env==0.6.2 sqlalchemy2-stubs==0.0.2a23 +types-beautifulsoup4==4.11.3 types-pytz==2021.3.8 # Flake8 + plugins