From 572426f3e37257c3810522bdeb8b8725fdc93bf5 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 19 Jan 2021 18:47:59 +0100 Subject: [PATCH] Added retry; should work now --- README.md | 8 +++++++- scraper.py | 11 +++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fe7f84a..743cbaa 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # 4kwallpapers-scraper -A Python script to scrape wallpapers from https://4kwallpapers.com/. \ No newline at end of file +A Python script to scrape wallpapers from https://4kwallpapers.com/. + +## Usage +The program takes a single command line argument, namely the root folder of +whre you want all the pictures to go. It'll create sub-folders for all the +categories, and download all pictures in one go. Yes, this can take a while +(15m in my case, with a fast internet connection). diff --git a/scraper.py b/scraper.py index 5af1448..79af377 100644 --- a/scraper.py +++ b/scraper.py @@ -19,11 +19,14 @@ class PageHandler: def url(self): return urlunsplit(("https", self.domain, self.path, "", "")) - def get(self, path="", query="", soup=True): + def get(self, path="", query="", soup=True, max_tries=5): r = requests.get(self.relative_url(path, query), allow_redirects=True) - if r.status_code != 200: - raise requests.exceptions.RequestException() + if r.status_code >= 400 and max_tries > 0: + return self.get(path, query, soup, max_tries - 1) + + elif r.status_code != 200: + raise requests.exceptions.RequestError() if soup: return BeautifulSoup(r.content, "html.parser") @@ -114,7 +117,7 @@ class Category(PageHandler): return self._images def download(self, dir_path): - dir_path = os.path.join(dir_path, self.name) + dir_path = os.path.join(dir_path, self.name.replace("/", "-")) os.makedirs(dir_path, exist_ok=True)