av一区二区在线观看_亚洲男人的天堂网站_日韩亚洲视频_在线成人免费_欧美日韩精品免费观看视频_久草视

您的位置:首頁技術文章
文章詳情頁

python 爬取影視網站下載鏈接

瀏覽:75日期:2022-06-18 09:31:11
目錄項目地址:運行效果導入模塊爬蟲主代碼完整代碼項目地址:

https://github.com/GriffinLewis2001/Python_movie_links_scraper

運行效果

python 爬取影視網站下載鏈接

python 爬取影視網站下載鏈接

導入模塊

import requests,refrom requests.cookies import RequestsCookieJarfrom fake_useragent import UserAgentimport os,pickle,threading,timeimport concurrent.futuresfrom goto import with_goto爬蟲主代碼

def get_content_url_name(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) url_name_list=reg.findall(content) return url_name_listdef get_content(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ return response.textdef search_durl(url): content=get_content(url) reg=re.compile(r'{’x64x65x63x72x69x70x74x50x61x72x61x6d’:’(.*?)’}') index=reg.findall(content)[0] download_url=url[:-5]+r’/downloadList?decriptParam=’+index content=get_content(download_url) reg1=re.compile(r’title='.*?' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) download_list=reg1.findall(content) return download_listdef get_page(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a target='_blank' href='http://m.4tl426be.cn/bcjs/(.*?)' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' >(.*?)</a>’) url_name_list=reg.findall(content) return url_name_list@with_gotodef main(): print('=========================================================') name=input('請輸入劇名(輸入quit退出):') if name == 'quit':exit() url='http://www.yikedy.co/search?query='+name dlist=get_page(url) print('n') if(dlist):num=0count=0for i in dlist: if (name in i[1]) :print(f'{num} {i[1]}')num+=1 elif num==0 and count==len(dlist)-1:goto .end count+=1dest=int(input('nn請輸入劇的編號(輸100跳過此次搜尋):'))if dest == 100: goto .endx=0print('n以下為下載鏈接:n')for i in dlist: if (name in i[1]):if(x==dest): for durl in search_durl(i[0]):print(f'{durl}n') print('n') breakx+=1 else:label .endprint('沒找到或不想看n')完整代碼

import requests,refrom requests.cookies import RequestsCookieJarfrom fake_useragent import UserAgentimport os,pickle,threading,timeimport concurrent.futuresfrom goto import with_gotodef get_content_url_name(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a href='http://m.4tl426be.cn/bcjs/(.*?)' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) url_name_list=reg.findall(content) return url_name_listdef get_content(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ return response.textdef search_durl(url): content=get_content(url) reg=re.compile(r'{’x64x65x63x72x69x70x74x50x61x72x61x6d’:’(.*?)’}') index=reg.findall(content)[0] download_url=url[:-5]+r’/downloadList?decriptParam=’+index content=get_content(download_url) reg1=re.compile(r’title='.*?' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' ’) download_list=reg1.findall(content) return download_listdef get_page(url): send_headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8'} cookie_jar = RequestsCookieJar() cookie_jar.set('mttp', '9740fe449238', domain='www.yikedy.co') response=requests.get(url,send_headers,cookies=cookie_jar) response.encoding=’utf-8’ content=response.text reg=re.compile(r’<a target='_blank' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' rel='external nofollow' >(.*?)</a>’) url_name_list=reg.findall(content) return url_name_list@with_gotodef main(): print('=========================================================') name=input('請輸入劇名(輸入quit退出):') if name == 'quit':exit() url='http://www.xxx.com/search?query='+name dlist=get_page(url) print('n') if(dlist):num=0count=0for i in dlist: if (name in i[1]) :print(f'{num} {i[1]}')num+=1 elif num==0 and count==len(dlist)-1:goto .end count+=1dest=int(input('nn請輸入劇的編號(輸100跳過此次搜尋):'))if dest == 100: goto .endx=0print('n以下為下載鏈接:n')for i in dlist: if (name in i[1]):if(x==dest): for durl in search_durl(i[0]):print(f'{durl}n') print('n') breakx+=1 else:label .endprint('沒找到或不想看n')print('本軟件由CLY.所有nn')while(True): main()

以上就是python 爬取影視網站下載鏈接的詳細內容,更多關于python 爬取下載鏈接的資料請關注好吧啦網其它相關文章!

標簽: Python 編程
相關文章:
主站蜘蛛池模板: 日韩免费小视频 | 国产天堂网 | 国产一级特黄 | 国产免费小视频 | 成人爽a毛片一区二区免费 亚洲午夜在线观看 | 国产精品三 | 夜夜嗨av一区二区三区 | 蜜桃一区二区 | 日韩免费大片 | 成人国产综合 | 亚洲精品日韩精品 | 国产伦精品一区二区三区88av | 成年人的免费视频 | 亚洲成人精品 | 国产精品免费一区 | 双性呜呜宫交受不住了h | 五月婷婷综合激情 | 丁香在线视频 | 日韩精品久久 | 国产这里只有精品 | 国产精品视频久久 | 久久久成人免费视频 | 久草热视频 | 久草综合在线 | 久久久久久免费毛片精品 | 99热国产 | 在线理论片 | 九九精品在线观看 | 国产香蕉在线观看 | 日韩精品在线观看视频 | 久久久成人免费视频 | 一级黄色片视频 | 久久九九免费视频 | 一区二区三区av | 日本欧美视频 | 国产伦精品一区二区三区视频我 | 在线看成人片 | 欧美另类激情 | 夜夜夜夜操| 韩国精品一区二区 | 在线播放h |