Added retry; should work now

2021-01-19 18:47:59 +01:00 · 2021-01-19 18:47:59 +01:00 · 572426f3e3
parent 8541eb65aa
commit 572426f3e3
2 changed files with 14 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,9 @@
 # 4kwallpapers-scraper

-A Python script to scrape wallpapers from https://4kwallpapers.com/.
+A Python script to scrape wallpapers from https://4kwallpapers.com/.
+
+## Usage
+The program takes a single command line argument, namely the root folder of
+whre you want all the pictures to go. It'll create sub-folders for all the
+categories, and download all pictures in one go. Yes, this can take a while
+(15m in my case, with a fast internet connection).
--- a/scraper.py
+++ b/scraper.py
@ -19,11 +19,14 @@ class PageHandler:
    def url(self):
        return urlunsplit(("https", self.domain, self.path, "", ""))

-    def get(self, path="", query="", soup=True):
+    def get(self, path="", query="", soup=True, max_tries=5):
        r = requests.get(self.relative_url(path, query), allow_redirects=True)

-        if r.status_code != 200:
-            raise requests.exceptions.RequestException()
+        if r.status_code >= 400 and max_tries > 0:
+            return self.get(path, query, soup, max_tries - 1)
+
+        elif r.status_code != 200:
+            raise requests.exceptions.RequestError()

        if soup:
            return BeautifulSoup(r.content, "html.parser")
@ -114,7 +117,7 @@ class Category(PageHandler):
        return self._images

    def download(self, dir_path):
-        dir_path = os.path.join(dir_path, self.name)
+        dir_path = os.path.join(dir_path, self.name.replace("/", "-"))

        os.makedirs(dir_path, exist_ok=True)