使用urllib,urllib2,beautifulsoup.
执行下面代码,输入要查找的美剧名即可:
查询到所有结果并遍历每一个结果中不同视频格式的所有下载链接
# coding: utf-8
import urllib
import urllib2
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf8')
host= "http://www.meijutt.com"
def getUrlRespHtml(url,data=None):
heads = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7',
'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Cache-Control':'max-age=0',
'Host':host,
'Connection':'keep-alive',
'Referer':url,
'User-Agent':'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.14) Gecko/20110221 Ubuntu/10.10 (maverick) Firefox/3.6.14'}
req = urllib2.Request(url)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
opener.addheaders = heads.items()
if data:
data = urllib.urlencode(data)
respHtml = opener.open(req,data).read()
else:
respHtml = opener.open(req).read()
return respHtml
def get_bt(url):
html = getUrlRespHtml(url)
bs_html = BeautifulSoup(html)
download_list = bs_html.find_all('div','down_list')
for index,down_list in enumerate(download_list):
if len(download_list)>1:
print '<<<<<视频格式%s>>>>>'%(index+1)
for li in down_list.find_all('li'):
f_attrs = li.find('input','down_url').attrs
print f_attrs.get('value')
def get_bts(searchword):
data ={'searchword':searchword}
url = host+'/search.asp'
html=getUrlRespHtml(url,data)
bs = BeautifulSoup(html)
cn_box2 = bs.find_all('div','cn_box2')
print '\n搜索结果:%s\n'%len(cn_box2)
for index,div in enumerate(cn_box2):
print '------搜索结果%s------'%(index+1)
attrs = div.a.attrs
title = attrs.get('title')
href = attrs.get('href')
print title,(host+href)
get_bt(host+href)
print '------end--------\n'
searchword=raw_input("请输入要搜索的美剧:")
get_bts(searchword.encode('gb2312'))
2025 - 快车库 - 我的知识库 重庆启连科技有限公司 渝ICP备16002641号-10
企客连连 表单助手 企服开发 榜单123