1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
| import requests from bs4 import BeautifulSoup from requests.packages.urllib3.exceptions import InsecureRequestWarning import os
class chiphell: def __init__(self,page): self.page = page
def get_forumaddr(self): requests.packages.urllib3.disable_warnings(InsecureRequestWarning) self.forumaddr = [] self.cookiesdict = [] self.proxies = {'http': '127.0.0.1:8888', 'https': '127.0.0.1:8888'} self.headers = \ {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36", "referer":"https://www.chiphell.com/forum-62-1.html"} self.address = "https://www.chiphell.com/forum.php" self.params = {"mod":"forumdisplay", "fid":"62", "filter":"typeid", "orderby":"lastpost", "typeid":"277", "page":"1"} for i in range(1,self.page+1): self.params['page'] = i resdata = requests.get(self.address, headers=self.headers, params=self.params,cookies = self.cookiesdict) self.cookiesdict = dict(self.cookiesdict, **requests.utils.dict_from_cookiejar(resdata.cookies)) data = BeautifulSoup(resdata.text, 'html.parser').find('ul', class_='ml').find_all('li') for ii in data: self.forumaddr.append('https://www.chiphell.com/' + ii.find('h3', class_='xw0').find('a')['href'])
def get_forum_picture(self,address): print(address) dic=address[address.find('viewthread&tid=')+15:address.find('&extra=page')] if os.path.exists('output') == True: pass else: os.makedirs('output') if os.path.exists('.\output\\'+dic)== True: pass else: os.makedirs('.\output\\'+dic) self.get_picture_headers = \ {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36", "referer":"https://www.chiphell.com/forum.php?mod=forumdisplay&fid=62&filter=typeid&typeid=277"} self.get_picture_address = address resdata = requests.get(self.get_picture_address, headers=self.get_picture_headers, cookies=self.cookiesdict) data = BeautifulSoup(resdata.text, 'html.parser').find('div', class_='t_fsz').find_all('img',class_='zoom') for i in data: try: if i['zoomfile'] == None: pass else: print(i['zoomfile']) filename = i['zoomfile'][i['zoomfile'].rfind('/') + 1:] print(filename) pic = requests.get(i['zoomfile'], headers=self.get_picture_headers, cookies=self.cookiesdict) with open('.\output\\' + '.\\' + dic + '\\' + filename, 'wb') as file: file.write(pic.content) except: pass
def Start(self): self.get_forumaddr() for i in self.forumaddr: self.get_forum_picture(i)
a = chiphell(10) a.Start()
|