Added retry; should work now
parent
8541eb65aa
commit
572426f3e3
|
@ -1,3 +1,9 @@
|
|||
# 4kwallpapers-scraper
|
||||
|
||||
A Python script to scrape wallpapers from https://4kwallpapers.com/.
|
||||
|
||||
## Usage
|
||||
The program takes a single command line argument, namely the root folder of
|
||||
whre you want all the pictures to go. It'll create sub-folders for all the
|
||||
categories, and download all pictures in one go. Yes, this can take a while
|
||||
(15m in my case, with a fast internet connection).
|
||||
|
|
11
scraper.py
11
scraper.py
|
@ -19,11 +19,14 @@ class PageHandler:
|
|||
def url(self):
|
||||
return urlunsplit(("https", self.domain, self.path, "", ""))
|
||||
|
||||
def get(self, path="", query="", soup=True):
|
||||
def get(self, path="", query="", soup=True, max_tries=5):
|
||||
r = requests.get(self.relative_url(path, query), allow_redirects=True)
|
||||
|
||||
if r.status_code != 200:
|
||||
raise requests.exceptions.RequestException()
|
||||
if r.status_code >= 400 and max_tries > 0:
|
||||
return self.get(path, query, soup, max_tries - 1)
|
||||
|
||||
elif r.status_code != 200:
|
||||
raise requests.exceptions.RequestError()
|
||||
|
||||
if soup:
|
||||
return BeautifulSoup(r.content, "html.parser")
|
||||
|
@ -114,7 +117,7 @@ class Category(PageHandler):
|
|||
return self._images
|
||||
|
||||
def download(self, dir_path):
|
||||
dir_path = os.path.join(dir_path, self.name)
|
||||
dir_path = os.path.join(dir_path, self.name.replace("/", "-"))
|
||||
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
|
|
Loading…
Reference in New Issue