Added retry; should work now
							parent
							
								
									8541eb65aa
								
							
						
					
					
						commit
						572426f3e3
					
				| 
						 | 
					@ -1,3 +1,9 @@
 | 
				
			||||||
# 4kwallpapers-scraper
 | 
					# 4kwallpapers-scraper
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A Python script to scrape wallpapers from https://4kwallpapers.com/.
 | 
					A Python script to scrape wallpapers from https://4kwallpapers.com/.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Usage
 | 
				
			||||||
 | 
					The program takes a single command line argument, namely the root folder of
 | 
				
			||||||
 | 
					whre you want all the pictures to go. It'll create sub-folders for all the
 | 
				
			||||||
 | 
					categories, and download all pictures in one go. Yes, this can take a while
 | 
				
			||||||
 | 
					(15m in my case, with a fast internet connection).
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										11
									
								
								scraper.py
								
								
								
								
							
							
						
						
									
										11
									
								
								scraper.py
								
								
								
								
							| 
						 | 
					@ -19,11 +19,14 @@ class PageHandler:
 | 
				
			||||||
    def url(self):
 | 
					    def url(self):
 | 
				
			||||||
        return urlunsplit(("https", self.domain, self.path, "", ""))
 | 
					        return urlunsplit(("https", self.domain, self.path, "", ""))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get(self, path="", query="", soup=True):
 | 
					    def get(self, path="", query="", soup=True, max_tries=5):
 | 
				
			||||||
        r = requests.get(self.relative_url(path, query), allow_redirects=True)
 | 
					        r = requests.get(self.relative_url(path, query), allow_redirects=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if r.status_code != 200:
 | 
					        if r.status_code >= 400 and max_tries > 0:
 | 
				
			||||||
            raise requests.exceptions.RequestException()
 | 
					            return self.get(path, query, soup, max_tries - 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        elif r.status_code != 200:
 | 
				
			||||||
 | 
					            raise requests.exceptions.RequestError()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if soup:
 | 
					        if soup:
 | 
				
			||||||
            return BeautifulSoup(r.content, "html.parser")
 | 
					            return BeautifulSoup(r.content, "html.parser")
 | 
				
			||||||
| 
						 | 
					@ -114,7 +117,7 @@ class Category(PageHandler):
 | 
				
			||||||
        return self._images
 | 
					        return self._images
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def download(self, dir_path):
 | 
					    def download(self, dir_path):
 | 
				
			||||||
        dir_path = os.path.join(dir_path, self.name)
 | 
					        dir_path = os.path.join(dir_path, self.name.replace("/", "-"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        os.makedirs(dir_path, exist_ok=True)
 | 
					        os.makedirs(dir_path, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue