Added retry; should work now
parent
8541eb65aa
commit
572426f3e3
|
@ -1,3 +1,9 @@
|
||||||
# 4kwallpapers-scraper
|
# 4kwallpapers-scraper
|
||||||
|
|
||||||
A Python script to scrape wallpapers from https://4kwallpapers.com/.
|
A Python script to scrape wallpapers from https://4kwallpapers.com/.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
The program takes a single command line argument, namely the root folder of
|
||||||
|
whre you want all the pictures to go. It'll create sub-folders for all the
|
||||||
|
categories, and download all pictures in one go. Yes, this can take a while
|
||||||
|
(15m in my case, with a fast internet connection).
|
||||||
|
|
11
scraper.py
11
scraper.py
|
@ -19,11 +19,14 @@ class PageHandler:
|
||||||
def url(self):
|
def url(self):
|
||||||
return urlunsplit(("https", self.domain, self.path, "", ""))
|
return urlunsplit(("https", self.domain, self.path, "", ""))
|
||||||
|
|
||||||
def get(self, path="", query="", soup=True):
|
def get(self, path="", query="", soup=True, max_tries=5):
|
||||||
r = requests.get(self.relative_url(path, query), allow_redirects=True)
|
r = requests.get(self.relative_url(path, query), allow_redirects=True)
|
||||||
|
|
||||||
if r.status_code != 200:
|
if r.status_code >= 400 and max_tries > 0:
|
||||||
raise requests.exceptions.RequestException()
|
return self.get(path, query, soup, max_tries - 1)
|
||||||
|
|
||||||
|
elif r.status_code != 200:
|
||||||
|
raise requests.exceptions.RequestError()
|
||||||
|
|
||||||
if soup:
|
if soup:
|
||||||
return BeautifulSoup(r.content, "html.parser")
|
return BeautifulSoup(r.content, "html.parser")
|
||||||
|
@ -114,7 +117,7 @@ class Category(PageHandler):
|
||||||
return self._images
|
return self._images
|
||||||
|
|
||||||
def download(self, dir_path):
|
def download(self, dir_path):
|
||||||
dir_path = os.path.join(dir_path, self.name)
|
dir_path = os.path.join(dir_path, self.name.replace("/", "-"))
|
||||||
|
|
||||||
os.makedirs(dir_path, exist_ok=True)
|
os.makedirs(dir_path, exist_ok=True)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue