Added retry; should work now

master
Jef Roosens 2021-01-19 18:47:59 +01:00
parent 8541eb65aa
commit 572426f3e3
2 changed files with 14 additions and 5 deletions

View File

@ -1,3 +1,9 @@
# 4kwallpapers-scraper # 4kwallpapers-scraper
A Python script to scrape wallpapers from https://4kwallpapers.com/. A Python script to scrape wallpapers from https://4kwallpapers.com/.
## Usage
The program takes a single command line argument, namely the root folder of
whre you want all the pictures to go. It'll create sub-folders for all the
categories, and download all pictures in one go. Yes, this can take a while
(15m in my case, with a fast internet connection).

View File

@ -19,11 +19,14 @@ class PageHandler:
def url(self): def url(self):
return urlunsplit(("https", self.domain, self.path, "", "")) return urlunsplit(("https", self.domain, self.path, "", ""))
def get(self, path="", query="", soup=True): def get(self, path="", query="", soup=True, max_tries=5):
r = requests.get(self.relative_url(path, query), allow_redirects=True) r = requests.get(self.relative_url(path, query), allow_redirects=True)
if r.status_code != 200: if r.status_code >= 400 and max_tries > 0:
raise requests.exceptions.RequestException() return self.get(path, query, soup, max_tries - 1)
elif r.status_code != 200:
raise requests.exceptions.RequestError()
if soup: if soup:
return BeautifulSoup(r.content, "html.parser") return BeautifulSoup(r.content, "html.parser")
@ -114,7 +117,7 @@ class Category(PageHandler):
return self._images return self._images
def download(self, dir_path): def download(self, dir_path):
dir_path = os.path.join(dir_path, self.name) dir_path = os.path.join(dir_path, self.name.replace("/", "-"))
os.makedirs(dir_path, exist_ok=True) os.makedirs(dir_path, exist_ok=True)