mirror of https://git.acwing.com/iduoduo/orange
This commit is contained in:
parent
e95b9f85bf
commit
253401c53e
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"数组":"<li&&</p>",
|
||||
"链接": "href=\"&&\"",
|
||||
"标题": "title=\"&&\"",
|
||||
"图片": "data-original=\"&&\"",
|
||||
"简介": "简介:&&</div>",
|
||||
"分类": "电影$20#剧集$21#动漫$22#综艺$23#短剧$24",
|
||||
"副标题": "class=\"item-status text-overflow\">&&<",
|
||||
"分类url": "https://www.7.movie/vodtype/{cateId}.html",
|
||||
"搜索数组": "<a&&</a>",
|
||||
"搜索二次截取": "<ul class=\"ewave-page&&href\"&&\">",
|
||||
"搜索": "https://www.7.movie/vodsearch/-------------.html?wd={wd}",
|
||||
"搜索图片": "class=\"pic\">*data-original=\"&&\"",
|
||||
"搜索链接": "class=\"btn-box\">\"&&\"</",
|
||||
"播放数组": "class=\"episode-box scrollbar\"&&</ul>",
|
||||
"线路数组": "<ul class=\"swiper-wrapper\"&&</ul>",
|
||||
"线路标题": ">&&<em></em>"
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"请求头": "手机",
|
||||
"简介": "剧情介绍:+module-info-introduction-content\">&&</div>",
|
||||
"分类url": "https://svip1.fun/index.php/vod/show/area/{area}/class/{class}/id/{cateId}/page/{catePg}/year/{year}.html",
|
||||
"分类": "电影$20#电视剧$21#短剧$24#动漫$22#综艺$23"
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"线路数组": "<a data-toggle=\"tab\"&&/a>",
|
||||
"线路标题": ">&&<",
|
||||
"分类url": "http://dyxz.tv/list/{cateId}_{catePg}.html",
|
||||
"分类": "电影$1#电视剧$2#动漫$3#综艺$4"
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"分类url": "https://www.iysdq.cc/vodshow/{cateId}-{area}-------{catePg}---.html",
|
||||
"分类": "电影$1#电视剧$2#综艺$3#动漫$4#短剧$5"
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"请求头": "手机",
|
||||
"简介": "剧情介绍:+description\">&&</div>",
|
||||
"数组": "module-item\">&&module-item-text",
|
||||
"图片": "data-src=\"&&\"",
|
||||
"副标题": "video-class\">&&</span>",
|
||||
"分类url": "https://jxhwsl.com/index.php/vod/show/area/{area}/class/{class}/id/{cateId}/page/{catePg}/year/{year}.html",
|
||||
"分类": "电影$61#电视剧$79#短剧$99#动漫$93#综艺$88"
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"分类url": "https://www.bestpipe.cn/vodshow/{cateId}-{area}-------{catePg}---.html",
|
||||
"分类": "电影$20#剧集$21#短剧$24#综艺$23#动漫$22"
|
||||
}
|
2575
config.bin
2575
config.bin
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,30 @@
|
|||
{
|
||||
"SiteUrl": "https://www.kuafuzy.com",
|
||||
"Classes": [
|
||||
{
|
||||
"type_name": "电影",
|
||||
"type_id": "1"
|
||||
},
|
||||
{
|
||||
"type_name": "剧集",
|
||||
"type_id": "2"
|
||||
},
|
||||
{
|
||||
"type_name": "4K电影",
|
||||
"type_id": "3"
|
||||
},
|
||||
{
|
||||
"type_name": "4K剧集",
|
||||
"type_id": "4"
|
||||
},
|
||||
{
|
||||
"type_name": "动漫",
|
||||
"type_id": "5"
|
||||
},
|
||||
{
|
||||
"type_name": "短剧",
|
||||
"type_id": "6"
|
||||
}
|
||||
],
|
||||
"Cookie": "bbs_token=zNQpYs_2BmC2e_2FcUM_2BmuihZ33Jswh_2Fj7sPtelqcw_3D_3D; bbs_sid=lgs96gh42gevj7lsg5f8o3kjsi"
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import sys
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
'''
|
||||
example:
|
||||
{
|
||||
"key": "py_appV2",
|
||||
"name": "xxx",
|
||||
"type": 3,
|
||||
"searchable": 1,
|
||||
"quickSearch": 1,
|
||||
"filterable": 1,
|
||||
"api": "./py/APPV2.py",
|
||||
"ext": "http://cmsyt.lyyytv.cn"
|
||||
}
|
||||
|
||||
'''
|
||||
self.host=extend
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'okhttp/4.12.0',
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
data = self.fetch(f"{self.host}//api.php/app/nav?token=",headers=self.headers).json()
|
||||
keys = ["class", "area", "lang", "year", "letter", "by", "sort"]
|
||||
filters = {}
|
||||
classes = []
|
||||
for item in data['list']:
|
||||
has_non_empty_field = False
|
||||
jsontype_extend = item["type_extend"]
|
||||
classes.append({"type_name": item["type_name"], "type_id": item["type_id"]})
|
||||
for key in keys:
|
||||
if key in jsontype_extend and jsontype_extend[key].strip() != "":
|
||||
has_non_empty_field = True
|
||||
break
|
||||
if has_non_empty_field:
|
||||
filters[str(item["type_id"])] = []
|
||||
for dkey in jsontype_extend:
|
||||
if dkey in keys and jsontype_extend[dkey].strip() != "":
|
||||
values = jsontype_extend[dkey].split(",")
|
||||
value_array = [{"n": value.strip(), "v": value.strip()} for value in values if
|
||||
value.strip() != ""]
|
||||
filters[str(item["type_id"])].append({"key": dkey, "name": dkey, "value": value_array})
|
||||
result = {}
|
||||
result["class"] = classes
|
||||
result["filters"] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
data=self.fetch(f"{self.host}/api.php/app/index_video?token=",headers=self.headers).json()
|
||||
videos=[]
|
||||
for item in data['list']:videos.extend(item['vlist'])
|
||||
return {'list':videos}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
params = {'tid':tid,'class':extend.get('class',''),'area':extend.get('area',''),'lang':extend.get('lang',''),'year':extend.get('year',''),'limit':'18','pg':pg}
|
||||
data=self.fetch(f"{self.host}/api.php/app/video",params=params,headers=self.headers).json()
|
||||
return data
|
||||
|
||||
def detailContent(self, ids):
|
||||
data=self.fetch(f"{self.host}/api.php/app/video_detail?id={ids[0]}",headers=self.headers).json()
|
||||
return {'list':[data['data']]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data=self.fetch(f"{self.host}/api.php/app/search?text={key}&pg={pg}",headers=self.headers).json()
|
||||
videos=data['list']
|
||||
for item in data['list']:
|
||||
item.pop('type', None)
|
||||
return {'list':videos,'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
return {'jx':1,'playUrl':'','parse': 1, 'url': id, 'header': self.headers}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
|
|
@ -0,0 +1,581 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
import traceback
|
||||
import sys
|
||||
|
||||
sys.path.append('../../')
|
||||
try:
|
||||
from base.spider import Spider
|
||||
except ImportError:
|
||||
# 定义一个基础接口类,用于本地测试
|
||||
class Spider:
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
class Spider(Spider):
|
||||
def __init__(self):
|
||||
self.siteUrl = "https://www.kuaikaw.cn"
|
||||
self.nextData = None # 缓存NEXT_DATA数据
|
||||
self.cateManual = {
|
||||
"甜宠": "462",
|
||||
"古装仙侠": "1102",
|
||||
"现代言情": "1145",
|
||||
"青春": "1170",
|
||||
"豪门恩怨": "585",
|
||||
"逆袭": "417-464",
|
||||
"重生": "439-465",
|
||||
"系统": "1159",
|
||||
"总裁": "1147",
|
||||
"职场商战": "943"
|
||||
}
|
||||
|
||||
def getName(self):
|
||||
# 返回爬虫名称
|
||||
return "河马短剧"
|
||||
|
||||
def init(self, extend=""):
|
||||
return
|
||||
|
||||
def fetch(self, url, headers=None):
|
||||
"""统一的网络请求接口"""
|
||||
if headers is None:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
"Referer": self.siteUrl,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except Exception as e:
|
||||
print(f"请求异常: {url}, 错误: {str(e)}")
|
||||
return None
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
# 检查是否为视频格式
|
||||
video_formats = ['.mp4', '.mkv', '.avi', '.wmv', '.m3u8', '.flv', '.rmvb']
|
||||
for format in video_formats:
|
||||
if format in url.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
def manualVideoCheck(self):
|
||||
# 不需要手动检查
|
||||
return False
|
||||
|
||||
def homeContent(self, filter):
|
||||
"""获取首页分类及筛选"""
|
||||
result = {}
|
||||
# 分类列表,使用已初始化的cateManual
|
||||
classes = []
|
||||
for k in self.cateManual:
|
||||
classes.append({
|
||||
'type_name': k,
|
||||
'type_id': self.cateManual[k]
|
||||
})
|
||||
result['class'] = classes
|
||||
# 获取首页推荐视频
|
||||
try:
|
||||
result['list'] = self.homeVideoContent()['list']
|
||||
except:
|
||||
result['list'] = []
|
||||
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
"""获取首页推荐视频内容"""
|
||||
videos = []
|
||||
try:
|
||||
response = self.fetch(self.siteUrl)
|
||||
html_content = response.text
|
||||
# 提取NEXT_DATA JSON数据
|
||||
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
|
||||
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
|
||||
if next_data_match:
|
||||
next_data_json = json.loads(next_data_match.group(1))
|
||||
page_props = next_data_json.get("props", {}).get("pageProps", {})
|
||||
# 获取轮播图数据 - 这些通常是推荐内容
|
||||
if "bannerList" in page_props and isinstance(page_props["bannerList"], list):
|
||||
banner_list = page_props["bannerList"]
|
||||
for banner in banner_list:
|
||||
book_id = banner.get("bookId", "")
|
||||
book_name = banner.get("bookName", "")
|
||||
cover_url = banner.get("coverWap", banner.get("wapUrl", ""))
|
||||
# 获取状态和章节数
|
||||
status = banner.get("statusDesc", "")
|
||||
total_chapters = banner.get("totalChapterNum", "")
|
||||
if book_id and book_name:
|
||||
videos.append({
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status} {total_chapters}集" if total_chapters else status
|
||||
})
|
||||
|
||||
# SEO分类下的推荐
|
||||
if "seoColumnVos" in page_props and isinstance(page_props["seoColumnVos"], list):
|
||||
for column in page_props["seoColumnVos"]:
|
||||
book_infos = column.get("bookInfos", [])
|
||||
for book in book_infos:
|
||||
book_id = book.get("bookId", "")
|
||||
book_name = book.get("bookName", "")
|
||||
cover_url = book.get("coverWap", "")
|
||||
status = book.get("statusDesc", "")
|
||||
total_chapters = book.get("totalChapterNum", "")
|
||||
|
||||
if book_id and book_name:
|
||||
videos.append({
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status} {total_chapters}集" if total_chapters else status
|
||||
})
|
||||
|
||||
# # 去重
|
||||
# seen = set()
|
||||
# unique_videos = []
|
||||
# for video in videos:
|
||||
# if video["vod_id"] not in seen:
|
||||
# seen.add(video["vod_id"])
|
||||
# unique_videos.append(video)
|
||||
# videos = unique_videos
|
||||
|
||||
except Exception as e:
|
||||
print(f"获取首页推荐内容出错: {e}")
|
||||
|
||||
result = {
|
||||
"list": videos
|
||||
}
|
||||
return result
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
"""获取分类内容"""
|
||||
result = {}
|
||||
videos = []
|
||||
url = f"{self.siteUrl}/browse/{tid}/{pg}"
|
||||
response = self.fetch(url)
|
||||
html_content = response.text
|
||||
# 提取NEXT_DATA JSON数据
|
||||
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
|
||||
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
|
||||
if next_data_match:
|
||||
next_data_json = json.loads(next_data_match.group(1))
|
||||
page_props = next_data_json.get("props", {}).get("pageProps", {})
|
||||
# 获取总页数和当前页
|
||||
current_page = page_props.get("page", 1)
|
||||
total_pages = page_props.get("pages", 1)
|
||||
# 获取书籍列表
|
||||
book_list = page_props.get("bookList", [])
|
||||
# 转换为通用格式
|
||||
for book in book_list:
|
||||
book_id = book.get("bookId", "")
|
||||
book_name = book.get("bookName", "")
|
||||
cover_url = book.get("coverWap", "")
|
||||
status_desc = book.get("statusDesc", "")
|
||||
total_chapters = book.get("totalChapterNum", "")
|
||||
if book_id and book_name:
|
||||
videos.append({
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status_desc} {total_chapters}集" if total_chapters else status_desc
|
||||
})
|
||||
# 构建返回结果
|
||||
result = {
|
||||
"list": videos,
|
||||
"page": int(current_page),
|
||||
"pagecount": total_pages,
|
||||
"limit": len(videos),
|
||||
"total": total_pages * len(videos) if videos else 0
|
||||
}
|
||||
return result
|
||||
|
||||
def switch(self, key, pg):
|
||||
# 搜索功能
|
||||
search_results = []
|
||||
# 获取第一页结果,并检查总页数
|
||||
url = f"{self.siteUrl}/search?searchValue={key}&page={pg}"
|
||||
response = self.fetch(url)
|
||||
html_content = response.text
|
||||
# 提取NEXT_DATA JSON数据
|
||||
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
|
||||
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
|
||||
if next_data_match:
|
||||
next_data_json = json.loads(next_data_match.group(1))
|
||||
page_props = next_data_json.get("props", {}).get("pageProps", {})
|
||||
# 获取总页数
|
||||
total_pages = page_props.get("pages", 1)
|
||||
# 处理所有页的数据
|
||||
all_book_list = []
|
||||
# 添加第一页的书籍列表
|
||||
book_list = page_props.get("bookList", [])
|
||||
all_book_list.extend(book_list)
|
||||
# 如果有多页,获取其他页的数据
|
||||
if total_pages > 1 : # quick模式只获取第一页
|
||||
for page in range(2, total_pages + 1):
|
||||
next_page_url = f"{self.siteUrl}/search?searchValue={key}&page={page}"
|
||||
next_page_response = self.fetch(next_page_url)
|
||||
next_page_html = next_page_response.text
|
||||
next_page_match = re.search(next_data_pattern, next_page_html, re.DOTALL)
|
||||
if next_page_match:
|
||||
next_page_json = json.loads(next_page_match.group(1))
|
||||
next_page_props = next_page_json.get("props", {}).get("pageProps", {})
|
||||
next_page_books = next_page_props.get("bookList", [])
|
||||
all_book_list.extend(next_page_books)
|
||||
# 转换为统一的搜索结果格式
|
||||
for book in all_book_list:
|
||||
book_id = book.get("bookId", "")
|
||||
book_name = book.get("bookName", "")
|
||||
cover_url = book.get("coverWap", "")
|
||||
total_chapters = book.get("totalChapterNum", "0")
|
||||
status_desc = book.get("statusDesc", "")
|
||||
# 构建视频项
|
||||
vod = {
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status_desc} {total_chapters}集"
|
||||
}
|
||||
search_results.append(vod)
|
||||
result = {
|
||||
"list": search_results,
|
||||
"page": pg
|
||||
}
|
||||
return result
|
||||
|
||||
def searchContent(self, key, quick, pg=1):
|
||||
result = self.switch(key, pg=pg)
|
||||
result['page'] = pg
|
||||
return result
|
||||
|
||||
def searchContentPage(self, key, quick, pg=1):
|
||||
return self.searchContent(key, quick, pg)
|
||||
|
||||
def detailContent(self, ids):
|
||||
# 获取剧集信息
|
||||
vod_id = ids[0]
|
||||
episode_id = None
|
||||
chapter_id = None
|
||||
|
||||
if not vod_id.startswith('/drama/'):
|
||||
if vod_id.startswith('/episode/'):
|
||||
episode_info = vod_id.replace('/episode/', '').split('/')
|
||||
if len(episode_info) >= 2:
|
||||
episode_id = episode_info[0]
|
||||
chapter_id = episode_info[1]
|
||||
vod_id = f'/drama/{episode_id}'
|
||||
else:
|
||||
vod_id = '/drama/' + vod_id
|
||||
|
||||
drama_url = self.siteUrl + vod_id
|
||||
print(f"请求URL: {drama_url}")
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
"Referer": self.siteUrl,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
|
||||
}
|
||||
|
||||
rsp = self.fetch(drama_url, headers=headers)
|
||||
if not rsp or rsp.status_code != 200:
|
||||
print(f"请求失败,状态码: {getattr(rsp, 'status_code', 'N/A')}")
|
||||
return {}
|
||||
|
||||
html = rsp.text
|
||||
next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
|
||||
|
||||
if not next_data_match:
|
||||
print("未找到NEXT_DATA内容")
|
||||
return {}
|
||||
|
||||
try:
|
||||
next_data = json.loads(next_data_match.group(1))
|
||||
page_props = next_data.get("props", {}).get("pageProps", {})
|
||||
print(f"找到页面属性,包含 {len(page_props.keys())} 个键")
|
||||
|
||||
book_info = page_props.get("bookInfoVo", {})
|
||||
chapter_list = page_props.get("chapterList", [])
|
||||
|
||||
title = book_info.get("title", "")
|
||||
sub_title = f"{book_info.get('totalChapterNum', '')}集"
|
||||
|
||||
categories = []
|
||||
for category in book_info.get("categoryList", []):
|
||||
categories.append(category.get("name", ""))
|
||||
|
||||
vod_content = book_info.get("introduction", "")
|
||||
|
||||
vod = {
|
||||
"vod_id": vod_id,
|
||||
"vod_name": title,
|
||||
"vod_pic": book_info.get("coverWap", ""),
|
||||
"type_name": ",".join(categories),
|
||||
"vod_year": "",
|
||||
"vod_area": book_info.get("countryName", ""),
|
||||
"vod_remarks": sub_title,
|
||||
"vod_actor": ", ".join([p.get("name", "") for p in book_info.get("performerList", [])]),
|
||||
"vod_director": "",
|
||||
"vod_content": vod_content
|
||||
}
|
||||
|
||||
# 处理播放列表
|
||||
play_url_list = []
|
||||
episodes = []
|
||||
|
||||
if chapter_list:
|
||||
print(f"找到 {len(chapter_list)} 个章节")
|
||||
|
||||
# 先检查是否有可以直接使用的MP4链接作为模板
|
||||
mp4_template = None
|
||||
first_mp4_chapter_id = None
|
||||
|
||||
# 先搜索第一个章节的MP4链接
|
||||
# 为提高成功率,尝试直接请求第一个章节的播放页
|
||||
if chapter_list and len(chapter_list) > 0:
|
||||
first_chapter = chapter_list[0]
|
||||
first_chapter_id = first_chapter.get("chapterId", "")
|
||||
drama_id_clean = vod_id.replace('/drama/', '')
|
||||
|
||||
if first_chapter_id and drama_id_clean:
|
||||
first_episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{first_chapter_id}"
|
||||
print(f"请求第一集播放页: {first_episode_url}")
|
||||
|
||||
first_rsp = self.fetch(first_episode_url, headers=headers)
|
||||
if first_rsp and first_rsp.status_code == 200:
|
||||
first_html = first_rsp.text
|
||||
# 直接从HTML提取MP4链接
|
||||
mp4_pattern = r'(https?://[^"\']+\.mp4)'
|
||||
mp4_matches = re.findall(mp4_pattern, first_html)
|
||||
if mp4_matches:
|
||||
mp4_template = mp4_matches[0]
|
||||
first_mp4_chapter_id = first_chapter_id
|
||||
print(f"找到MP4链接模板: {mp4_template}")
|
||||
print(f"模板对应的章节ID: {first_mp4_chapter_id}")
|
||||
|
||||
# 如果未找到模板,再检查章节对象中是否有MP4链接
|
||||
if not mp4_template:
|
||||
for chapter in chapter_list[:5]: # 只检查前5个章节以提高效率
|
||||
if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
|
||||
chapter_video = chapter["chapterVideoVo"]
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url and ".mp4" in mp4_url:
|
||||
mp4_template = mp4_url
|
||||
first_mp4_chapter_id = chapter.get("chapterId", "")
|
||||
print(f"从chapterVideoVo找到MP4链接模板: {mp4_template}")
|
||||
print(f"模板对应的章节ID: {first_mp4_chapter_id}")
|
||||
break
|
||||
|
||||
# 遍历所有章节处理播放信息
|
||||
for chapter in chapter_list:
|
||||
chapter_id = chapter.get("chapterId", "")
|
||||
chapter_name = chapter.get("chapterName", "")
|
||||
|
||||
# 1. 如果章节自身有MP4链接,直接使用
|
||||
if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
|
||||
chapter_video = chapter["chapterVideoVo"]
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url and ".mp4" in mp4_url:
|
||||
episodes.append(f"{chapter_name}${mp4_url}")
|
||||
continue
|
||||
|
||||
# 2. 如果有MP4模板,尝试替换章节ID构建MP4链接
|
||||
if mp4_template and first_mp4_chapter_id and chapter_id:
|
||||
# 替换模板中的章节ID部分
|
||||
if first_mp4_chapter_id in mp4_template:
|
||||
new_mp4_url = mp4_template.replace(first_mp4_chapter_id, chapter_id)
|
||||
episodes.append(f"{chapter_name}${new_mp4_url}")
|
||||
continue
|
||||
|
||||
# 3. 如果上述方法都不可行,回退到使用chapter_id构建中间URL
|
||||
if chapter_id and chapter_name:
|
||||
url = f"{vod_id}${chapter_id}${chapter_name}"
|
||||
episodes.append(f"{chapter_name}${url}")
|
||||
|
||||
if not episodes and vod_id:
|
||||
# 尝试构造默认的集数
|
||||
total_chapters = int(book_info.get("totalChapterNum", "0"))
|
||||
if total_chapters > 0:
|
||||
print(f"尝试构造 {total_chapters} 个默认集数")
|
||||
|
||||
# 如果知道章节ID的模式,可以构造
|
||||
if chapter_id and episode_id:
|
||||
for i in range(1, total_chapters + 1):
|
||||
chapter_name = f"第{i}集"
|
||||
url = f"{vod_id}${chapter_id}${chapter_name}"
|
||||
episodes.append(f"{chapter_name}${url}")
|
||||
else:
|
||||
# 使用普通的构造方式
|
||||
for i in range(1, total_chapters + 1):
|
||||
chapter_name = f"第{i}集"
|
||||
url = f"{vod_id}${chapter_name}"
|
||||
episodes.append(f"{chapter_name}${url}")
|
||||
|
||||
if episodes:
|
||||
play_url_list.append("#".join(episodes))
|
||||
vod['vod_play_from'] = '河马剧场'
|
||||
vod['vod_play_url'] = '$$$'.join(play_url_list)
|
||||
|
||||
result = {
|
||||
'list': [vod]
|
||||
}
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"解析详情页失败: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
return {}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
result = {}
|
||||
print(f"调用playerContent: flag={flag}, id={id}")
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
"Referer": self.siteUrl,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
|
||||
}
|
||||
|
||||
# 解析id参数
|
||||
parts = id.split('$')
|
||||
drama_id = None
|
||||
chapter_id = None
|
||||
|
||||
if len(parts) >= 2:
|
||||
drama_id = parts[0]
|
||||
chapter_id = parts[1]
|
||||
chapter_name = parts[2] if len(parts) > 2 else "第一集"
|
||||
print(f"解析参数: drama_id={drama_id}, chapter_id={chapter_id}")
|
||||
else:
|
||||
# 处理旧数据格式
|
||||
print(f"使用原始URL格式: {id}")
|
||||
result["parse"] = 0
|
||||
result["url"] = id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
# 直接检查chapter_id是否包含http(可能已经是视频链接)
|
||||
if 'http' in chapter_id and '.mp4' in chapter_id:
|
||||
print(f"已经是MP4链接: {chapter_id}")
|
||||
result["parse"] = 0
|
||||
result["url"] = chapter_id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
# 构建episode页面URL
|
||||
drama_id_clean = drama_id.replace('/drama/', '')
|
||||
episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{chapter_id}"
|
||||
print(f"请求episode页面: {episode_url}")
|
||||
|
||||
try:
|
||||
rsp = self.fetch(episode_url, headers=headers)
|
||||
if not rsp or rsp.status_code != 200:
|
||||
print(f"请求失败,状态码: {getattr(rsp, 'status_code', 'N/A')}")
|
||||
result["parse"] = 0
|
||||
result["url"] = id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
html = rsp.text
|
||||
print(f"获取页面大小: {len(html)} 字节")
|
||||
|
||||
# 尝试从NEXT_DATA提取视频链接
|
||||
mp4_url = None
|
||||
|
||||
# 方法1: 从NEXT_DATA提取
|
||||
next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
|
||||
if next_data_match:
|
||||
try:
|
||||
print("找到NEXT_DATA")
|
||||
next_data = json.loads(next_data_match.group(1))
|
||||
page_props = next_data.get("props", {}).get("pageProps", {})
|
||||
|
||||
# 从chapterList中查找当前章节
|
||||
chapter_list = page_props.get("chapterList", [])
|
||||
print(f"找到章节列表,长度: {len(chapter_list)}")
|
||||
|
||||
for chapter in chapter_list:
|
||||
if chapter.get("chapterId") == chapter_id:
|
||||
print(f"找到匹配的章节: {chapter.get('chapterName')}")
|
||||
chapter_video = chapter.get("chapterVideoVo", {})
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url:
|
||||
print(f"从chapterList找到MP4链接: {mp4_url}")
|
||||
break
|
||||
|
||||
# 如果未找到,尝试从当前章节获取
|
||||
if not mp4_url:
|
||||
current_chapter = page_props.get("chapterInfo", {})
|
||||
if current_chapter:
|
||||
print("找到当前章节信息")
|
||||
chapter_video = current_chapter.get("chapterVideoVo", {})
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url:
|
||||
print(f"从chapterInfo找到MP4链接: {mp4_url}")
|
||||
except Exception as e:
|
||||
print(f"解析NEXT_DATA失败: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
|
||||
# 方法2: 直接从HTML中提取MP4链接
|
||||
if not mp4_url:
|
||||
mp4_pattern = r'(https?://[^"\']+\.mp4)'
|
||||
mp4_matches = re.findall(mp4_pattern, html)
|
||||
if mp4_matches:
|
||||
# 查找含有chapter_id的链接
|
||||
matched_mp4 = False
|
||||
for url in mp4_matches:
|
||||
if chapter_id in url:
|
||||
mp4_url = url
|
||||
matched_mp4 = True
|
||||
print(f"从HTML直接提取章节MP4链接: {mp4_url}")
|
||||
break
|
||||
|
||||
# 如果没找到包含chapter_id的链接,使用第一个
|
||||
if not matched_mp4 and mp4_matches:
|
||||
mp4_url = mp4_matches[0]
|
||||
print(f"从HTML直接提取MP4链接: {mp4_url}")
|
||||
|
||||
if mp4_url and ".mp4" in mp4_url:
|
||||
print(f"最终找到的MP4链接: {mp4_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = mp4_url
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
else:
|
||||
print(f"未找到有效的MP4链接,尝试再次解析页面内容")
|
||||
# 再尝试一次从HTML中广泛搜索所有可能的MP4链接
|
||||
all_mp4_pattern = r'(https?://[^"\']+\.mp4)'
|
||||
all_mp4_matches = re.findall(all_mp4_pattern, html)
|
||||
if all_mp4_matches:
|
||||
mp4_url = all_mp4_matches[0]
|
||||
print(f"从HTML广泛搜索找到MP4链接: {mp4_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = mp4_url
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
print(f"未找到视频链接,返回原episode URL: {episode_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = episode_url
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"请求或解析失败: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
result["parse"] = 0
|
||||
result["url"] = id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
def localProxy(self, param):
|
||||
# 本地代理处理,此处简单返回传入的参数
|
||||
return [200, "video/MP2T", {}, param]
|
||||
|
||||
def destroy(self):
|
||||
# 资源回收
|
||||
pass
|
|
@ -0,0 +1,768 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from base64 import b64decode, b64encode
|
||||
from urllib.parse import parse_qs
|
||||
import requests
|
||||
from pyquery import PyQuery as pq
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
tid = 'douyin'
|
||||
headers = self.gethr(0, tid)
|
||||
response = requests.head(self.hosts[tid], headers=headers)
|
||||
ttwid = response.cookies.get('ttwid')
|
||||
headers.update({
|
||||
'authority': self.hosts[tid].split('//')[-1],
|
||||
'cookie': f'ttwid={ttwid}' if ttwid else ''
|
||||
})
|
||||
self.dyheaders = headers
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
headers = [
|
||||
{
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0"
|
||||
},
|
||||
{
|
||||
"User-Agent": "Dart/3.4 (dart:io)"
|
||||
}
|
||||
]
|
||||
|
||||
excepturl = 'https://www.baidu.com'
|
||||
|
||||
hosts = {
|
||||
"huya": ["https://www.huya.com","https://mp.huya.com"],
|
||||
"douyin": "https://live.douyin.com",
|
||||
"douyu": "https://www.douyu.com",
|
||||
"wangyi": "https://cc.163.com",
|
||||
"bili": ["https://api.live.bilibili.com", "https://api.bilibili.com"]
|
||||
}
|
||||
|
||||
referers = {
|
||||
"huya": "https://live.cdn.huya.com",
|
||||
"douyin": "https://live.douyin.com",
|
||||
"douyu": "https://m.douyu.com",
|
||||
"bili": "https://live.bilibili.com"
|
||||
}
|
||||
|
||||
playheaders = {
|
||||
"wangyi": {
|
||||
"User-Agent": "ExoPlayer",
|
||||
"Connection": "Keep-Alive",
|
||||
"Icy-MetaData": "1"
|
||||
},
|
||||
"bili": {
|
||||
'Accept': '*/*',
|
||||
'Icy-MetaData': '1',
|
||||
'referer': referers['bili'],
|
||||
'user-agent': headers[0]['User-Agent']
|
||||
},
|
||||
'douyin': {
|
||||
'User-Agent': 'libmpv',
|
||||
'Icy-MetaData': '1'
|
||||
},
|
||||
'huya': {
|
||||
'User-Agent': 'ExoPlayer',
|
||||
'Connection': 'Keep-Alive',
|
||||
'Icy-MetaData': '1'
|
||||
},
|
||||
'douyu': {
|
||||
'User-Agent': 'libmpv',
|
||||
'Icy-MetaData': '1'
|
||||
}
|
||||
}
|
||||
|
||||
def process_bili(self):
|
||||
try:
|
||||
self.blfdata = self.fetch(
|
||||
f'{self.hosts["bili"][0]}/room/v1/Area/getList?need_entrance=1&parent_id=0',
|
||||
headers=self.gethr(0, 'bili')
|
||||
).json()
|
||||
return ('bili', [{'key': 'cate', 'name': '分类',
|
||||
'value': [{'n': i['name'], 'v': str(i['id'])}
|
||||
for i in self.blfdata['data']]}])
|
||||
except Exception as e:
|
||||
print(f"bili处理错误: {e}")
|
||||
return 'bili', None
|
||||
|
||||
def process_douyin(self):
|
||||
try:
|
||||
data = self.getpq(self.hosts['douyin'], headers=self.dyheaders)('script')
|
||||
for i in data.items():
|
||||
if 'categoryData' in i.text():
|
||||
content = i.text()
|
||||
start = content.find('{')
|
||||
end = content.rfind('}') + 1
|
||||
if start != -1 and end != -1:
|
||||
json_str = content[start:end]
|
||||
json_str = json_str.replace('\\"', '"')
|
||||
try:
|
||||
self.dyifdata = json.loads(json_str)
|
||||
return ('douyin', [{'key': 'cate', 'name': '分类',
|
||||
'value': [{'n': i['partition']['title'],
|
||||
'v': f"{i['partition']['id_str']}@@{i['partition']['title']}"}
|
||||
for i in self.dyifdata['categoryData']]}])
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"douyin解析错误: {e}")
|
||||
return 'douyin', None
|
||||
except Exception as e:
|
||||
print(f"douyin请求或处理错误: {e}")
|
||||
return 'douyin', None
|
||||
|
||||
def process_douyu(self):
|
||||
try:
|
||||
self.dyufdata = self.fetch(
|
||||
f'{self.referers["douyu"]}/api/cate/list',
|
||||
headers=self.headers[1]
|
||||
).json()
|
||||
return ('douyu', [{'key': 'cate', 'name': '分类',
|
||||
'value': [{'n': i['cate1Name'], 'v': str(i['cate1Id'])}
|
||||
for i in self.dyufdata['data']['cate1Info']]}])
|
||||
except Exception as e:
|
||||
print(f"douyu错误: {e}")
|
||||
return 'douyu', None
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {}
|
||||
cateManual = {
|
||||
"虎牙": "huya",
|
||||
"哔哩": "bili",
|
||||
"抖音": "douyin",
|
||||
"斗鱼": "douyu",
|
||||
"网易": "wangyi"
|
||||
}
|
||||
classes = []
|
||||
filters = {
|
||||
'huya': [{'key': 'cate', 'name': '分类',
|
||||
'value': [{'n': '网游', 'v': '1'}, {'n': '单机', 'v': '2'},
|
||||
{'n': '娱乐', 'v': '8'}, {'n': '手游', 'v': '3'}]}]
|
||||
}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||
futures = {
|
||||
executor.submit(self.process_bili): 'bili',
|
||||
executor.submit(self.process_douyin): 'douyin',
|
||||
executor.submit(self.process_douyu): 'douyu'
|
||||
}
|
||||
|
||||
for future in futures:
|
||||
platform, filter_data = future.result()
|
||||
if filter_data:
|
||||
filters[platform] = filter_data
|
||||
|
||||
for k in cateManual:
|
||||
classes.append({
|
||||
'type_name': k,
|
||||
'type_id': cateManual[k]
|
||||
})
|
||||
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
pass
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
vdata = []
|
||||
result = {}
|
||||
pagecount = 9999
|
||||
result['page'] = pg
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
if tid == 'wangyi':
|
||||
vdata, pagecount = self.wyccContent(tid, pg, filter, extend, vdata)
|
||||
elif 'bili' in tid:
|
||||
vdata, pagecount = self.biliContent(tid, pg, filter, extend, vdata)
|
||||
elif 'huya' in tid:
|
||||
vdata, pagecount = self.huyaContent(tid, pg, filter, extend, vdata)
|
||||
elif 'douyin' in tid:
|
||||
vdata, pagecount = self.douyinContent(tid, pg, filter, extend, vdata)
|
||||
elif 'douyu' in tid:
|
||||
vdata, pagecount = self.douyuContent(tid, pg, filter, extend, vdata)
|
||||
result['list'] = vdata
|
||||
result['pagecount'] = pagecount
|
||||
return result
|
||||
|
||||
def wyccContent(self, tid, pg, filter, extend, vdata):
|
||||
params = {
|
||||
'format': 'json',
|
||||
'start': (int(pg) - 1) * 20,
|
||||
'size': '20',
|
||||
}
|
||||
response = self.fetch(f'{self.hosts[tid]}/api/category/live/', params=params, headers=self.headers[0]).json()
|
||||
for i in response['lives']:
|
||||
if i.get('cuteid'):
|
||||
bvdata = self.buildvod(
|
||||
vod_id=f"{tid}@@{i['cuteid']}",
|
||||
vod_name=i.get('title'),
|
||||
vod_pic=i.get('cover'),
|
||||
vod_remarks=i.get('nickname'),
|
||||
style={"type": "rect", "ratio": 1.33}
|
||||
)
|
||||
vdata.append(bvdata)
|
||||
return vdata, 9999
|
||||
|
||||
def biliContent(self, tid, pg, filter, extend, vdata):
|
||||
if extend.get('cate') and pg == '1' and 'click' not in tid:
|
||||
for i in self.blfdata['data']:
|
||||
if str(i['id']) == extend['cate']:
|
||||
for j in i['list']:
|
||||
v = self.buildvod(
|
||||
vod_id=f"click_{tid}@@{i['id']}@@{j['id']}",
|
||||
vod_name=j.get('name'),
|
||||
vod_pic=j.get('pic'),
|
||||
vod_tag=1,
|
||||
style={"type": "oval", "ratio": 1}
|
||||
)
|
||||
vdata.append(v)
|
||||
return vdata, 1
|
||||
else:
|
||||
path = f'/xlive/web-interface/v1/second/getListByArea?platform=web&sort=online&page_size=30&page={pg}'
|
||||
if 'click' in tid:
|
||||
ids = tid.split('_')[1].split('@@')
|
||||
tid = ids[0]
|
||||
path = f'/xlive/web-interface/v1/second/getList?platform=web&parent_area_id={ids[1]}&area_id={ids[-1]}&sort_type=&page={pg}'
|
||||
data = self.fetch(f'{self.hosts[tid][0]}{path}', headers=self.gethr(0, tid)).json()
|
||||
for i in data['data']['list']:
|
||||
if i.get('roomid'):
|
||||
data = self.buildvod(
|
||||
f"{tid}@@{i['roomid']}",
|
||||
i.get('title'),
|
||||
i.get('cover'),
|
||||
i.get('watched_show', {}).get('text_large'),
|
||||
0,
|
||||
i.get('uname'),
|
||||
style={"type": "rect", "ratio": 1.33}
|
||||
)
|
||||
vdata.append(data)
|
||||
return vdata, 9999
|
||||
|
||||
def huyaContent(self, tid, pg, filter, extend, vdata):
|
||||
if extend.get('cate') and pg == '1' and 'click' not in tid:
|
||||
id = extend.get('cate')
|
||||
data = self.fetch(f'{self.referers[tid]}/liveconfig/game/bussLive?bussType={id}',
|
||||
headers=self.headers[1]).json()
|
||||
for i in data['data']:
|
||||
v = self.buildvod(
|
||||
vod_id=f"click_{tid}@@{int(i['gid'])}",
|
||||
vod_name=i.get('gameFullName'),
|
||||
vod_pic=f'https://huyaimg.msstatic.com/cdnimage/game/{int(i["gid"])}-MS.jpg',
|
||||
vod_tag=1,
|
||||
style={"type": "oval", "ratio": 1}
|
||||
)
|
||||
vdata.append(v)
|
||||
return vdata, 1
|
||||
else:
|
||||
gid = ''
|
||||
if 'click' in tid:
|
||||
ids = tid.split('_')[1].split('@@')
|
||||
tid = ids[0]
|
||||
gid = f'&gameId={ids[1]}'
|
||||
data = self.fetch(f'{self.hosts[tid][0]}/cache.php?m=LiveList&do=getLiveListByPage&tagAll=0{gid}&page={pg}',
|
||||
headers=self.headers[1]).json()
|
||||
for i in data['data']['datas']:
|
||||
if i.get('profileRoom'):
|
||||
v = self.buildvod(
|
||||
f"{tid}@@{i['profileRoom']}",
|
||||
i.get('introduction'),
|
||||
i.get('screenshot'),
|
||||
str(int(i.get('totalCount', '1')) / 10000) + '万',
|
||||
0,
|
||||
i.get('nick'),
|
||||
style={"type": "rect", "ratio": 1.33}
|
||||
|
||||
)
|
||||
vdata.append(v)
|
||||
return vdata, 9999
|
||||
|
||||
def douyinContent(self, tid, pg, filter, extend, vdata):
|
||||
if extend.get('cate') and pg == '1' and 'click' not in tid:
|
||||
ids = extend.get('cate').split('@@')
|
||||
for i in self.dyifdata['categoryData']:
|
||||
c = i['partition']
|
||||
if c['id_str'] == ids[0] and c['title'] == ids[1]:
|
||||
vlist = i['sub_partition'].copy()
|
||||
vlist.insert(0, {'partition': c})
|
||||
for j in vlist:
|
||||
j = j['partition']
|
||||
v = self.buildvod(
|
||||
vod_id=f"click_{tid}@@{j['id_str']}@@{j['type']}",
|
||||
vod_name=j.get('title'),
|
||||
vod_pic='https://p3-pc-weboff.byteimg.com/tos-cn-i-9r5gewecjs/pwa_v3/512x512-1.png',
|
||||
vod_tag=1,
|
||||
style={"type": "oval", "ratio": 1}
|
||||
)
|
||||
vdata.append(v)
|
||||
return vdata, 1
|
||||
else:
|
||||
path = f'/webcast/web/partition/detail/room/?aid=6383&app_name=douyin_web&live_id=1&device_platform=web&count=15&offset={(int(pg) - 1) * 15}&partition=720&partition_type=1'
|
||||
if 'click' in tid:
|
||||
ids = tid.split('_')[1].split('@@')
|
||||
tid = ids[0]
|
||||
path = f'/webcast/web/partition/detail/room/?aid=6383&app_name=douyin_web&live_id=1&device_platform=web&count=15&offset={(int(pg) - 1) * 15}&partition={ids[1]}&partition_type={ids[-1]}&req_from=2'
|
||||
data = self.fetch(f'{self.hosts[tid]}{path}', headers=self.dyheaders).json()
|
||||
for i in data['data']['data']:
|
||||
v = self.buildvod(
|
||||
vod_id=f"{tid}@@{i['web_rid']}",
|
||||
vod_name=i['room'].get('title'),
|
||||
vod_pic=i['room']['cover'].get('url_list')[0],
|
||||
vod_year=i.get('user_count_str'),
|
||||
vod_remarks=i['room']['owner'].get('nickname'),
|
||||
style={"type": "rect", "ratio": 1.33}
|
||||
)
|
||||
vdata.append(v)
|
||||
return vdata, 9999
|
||||
|
||||
def douyuContent(self, tid, pg, filter, extend, vdata):
|
||||
if extend.get('cate') and pg == '1' and 'click' not in tid:
|
||||
for i in self.dyufdata['data']['cate2Info']:
|
||||
if str(i['cate1Id']) == extend['cate']:
|
||||
v = self.buildvod(
|
||||
vod_id=f"click_{tid}@@{i['cate2Id']}",
|
||||
vod_name=i.get('cate2Name'),
|
||||
vod_pic=i.get('icon'),
|
||||
vod_remarks=i.get('count'),
|
||||
vod_tag=1,
|
||||
style={"type": "oval", "ratio": 1}
|
||||
)
|
||||
vdata.append(v)
|
||||
return vdata, 1
|
||||
else:
|
||||
path = f'/japi/weblist/apinc/allpage/6/{pg}'
|
||||
if 'click' in tid:
|
||||
ids = tid.split('_')[1].split('@@')
|
||||
tid = ids[0]
|
||||
path = f'/gapi/rkc/directory/mixList/2_{ids[1]}/{pg}'
|
||||
url = f'{self.hosts[tid]}{path}'
|
||||
data = self.fetch(url, headers=self.headers[1]).json()
|
||||
for i in data['data']['rl']:
|
||||
v = self.buildvod(
|
||||
vod_id=f"{tid}@@{i['rid']}",
|
||||
vod_name=i.get('rn'),
|
||||
vod_pic=i.get('rs16'),
|
||||
vod_year=str(int(i.get('ol', 1)) / 10000) + '万',
|
||||
vod_remarks=i.get('nn'),
|
||||
style={"type": "rect", "ratio": 1.33}
|
||||
)
|
||||
vdata.append(v)
|
||||
return vdata, 9999
|
||||
|
||||
def detailContent(self, ids):
|
||||
ids = ids[0].split('@@')
|
||||
if ids[0] == 'wangyi':
|
||||
vod = self.wyccDetail(ids)
|
||||
elif ids[0] == 'bili':
|
||||
vod = self.biliDetail(ids)
|
||||
elif ids[0] == 'huya':
|
||||
vod = self.huyaDetail(ids)
|
||||
elif ids[0] == 'douyin':
|
||||
vod = self.douyinDetail(ids)
|
||||
elif ids[0] == 'douyu':
|
||||
vod = self.douyuDetail(ids)
|
||||
return {'list': [vod]}
|
||||
|
||||
def wyccDetail(self, ids):
|
||||
try:
|
||||
vdata = self.getpq(f'{self.hosts[ids[0]]}/{ids[1]}', self.headers[0])('script').eq(-1).text()
|
||||
|
||||
def get_quality_name(vbr):
|
||||
if vbr <= 600:
|
||||
return "标清"
|
||||
elif vbr <= 1000:
|
||||
return "高清"
|
||||
elif vbr <= 2000:
|
||||
return "超清"
|
||||
else:
|
||||
return "蓝光"
|
||||
|
||||
data = json.loads(vdata)['props']['pageProps']['roomInfoInitData']
|
||||
name = data['live'].get('title', ids[0])
|
||||
vod = self.buildvod(vod_name=data.get('keywords_suffix'), vod_remarks=data['live'].get('title'),
|
||||
vod_content=data.get('description_suffix'))
|
||||
resolution_data = data['live']['quickplay']['resolution']
|
||||
all_streams = {}
|
||||
sorted_qualities = sorted(resolution_data.items(),
|
||||
key=lambda x: x[1]['vbr'],
|
||||
reverse=True)
|
||||
for quality, data in sorted_qualities:
|
||||
vbr = data['vbr']
|
||||
quality_name = get_quality_name(vbr)
|
||||
for cdn_name, url in data['cdn'].items():
|
||||
if cdn_name not in all_streams and type(url) == str and url.startswith('http'):
|
||||
all_streams[cdn_name] = []
|
||||
if isinstance(url, str) and url.startswith('http'):
|
||||
all_streams[cdn_name].extend([quality_name, url])
|
||||
plists = []
|
||||
names = []
|
||||
for i, (cdn_name, stream_list) in enumerate(all_streams.items(), 1):
|
||||
names.append(f'线路{i}')
|
||||
pstr = f"{name}${ids[0]}@@{self.e64(json.dumps(stream_list))}"
|
||||
plists.append(pstr)
|
||||
vod['vod_play_from'] = "$$$".join(names)
|
||||
vod['vod_play_url'] = "$$$".join(plists)
|
||||
return vod
|
||||
except Exception as e:
|
||||
return self.handle_exception(e)
|
||||
|
||||
def biliDetail(self, ids):
|
||||
try:
|
||||
vdata = self.fetch(
|
||||
f'{self.hosts[ids[0]][0]}/xlive/web-room/v1/index/getInfoByRoom?room_id={ids[1]}&wts={int(time.time())}',
|
||||
headers=self.gethr(0, ids[0])).json()
|
||||
v = vdata['data']['room_info']
|
||||
vod = self.buildvod(
|
||||
vod_name=v.get('title'),
|
||||
type_name=v.get('parent_area_name') + '/' + v.get('area_name'),
|
||||
vod_remarks=v.get('tags'),
|
||||
vod_play_from=v.get('title'),
|
||||
)
|
||||
data = self.fetch(
|
||||
f'{self.hosts[ids[0]][0]}/xlive/web-room/v2/index/getRoomPlayInfo?room_id={ids[1]}&protocol=0%2C1&format=0%2C1%2C2&codec=0%2C1&platform=web',
|
||||
headers=self.gethr(0, ids[0])).json()
|
||||
vdnams = data['data']['playurl_info']['playurl']['g_qn_desc']
|
||||
all_accept_qns = []
|
||||
streams = data['data']['playurl_info']['playurl']['stream']
|
||||
for stream in streams:
|
||||
for format_item in stream['format']:
|
||||
for codec in format_item['codec']:
|
||||
if 'accept_qn' in codec:
|
||||
all_accept_qns.append(codec['accept_qn'])
|
||||
max_accept_qn = max(all_accept_qns, key=len) if all_accept_qns else []
|
||||
quality_map = {
|
||||
item['qn']: item['desc']
|
||||
for item in vdnams
|
||||
}
|
||||
quality_names = [f"{quality_map.get(qn)}${ids[0]}@@{ids[1]}@@{qn}" for qn in max_accept_qn]
|
||||
vod['vod_play_url'] = "#".join(quality_names)
|
||||
return vod
|
||||
except Exception as e:
|
||||
return self.handle_exception(e)
|
||||
|
||||
def huyaDetail(self, ids):
|
||||
try:
|
||||
vdata = self.fetch(f'{self.hosts[ids[0]][1]}/cache.php?m=Live&do=profileRoom&roomid={ids[1]}',
|
||||
headers=self.headers[0]).json()
|
||||
v = vdata['data']['liveData']
|
||||
vod = self.buildvod(
|
||||
vod_name=v.get('introduction'),
|
||||
type_name=v.get('gameFullName'),
|
||||
vod_director=v.get('nick'),
|
||||
vod_remarks=v.get('contentIntro'),
|
||||
)
|
||||
data = dict(reversed(list(vdata['data']['stream'].items())))
|
||||
names = []
|
||||
plist = []
|
||||
|
||||
for stream_type, stream_data in data.items():
|
||||
if isinstance(stream_data, dict) and 'multiLine' in stream_data and 'rateArray' in stream_data:
|
||||
names.append(f"线路{len(names) + 1}")
|
||||
qualities = sorted(
|
||||
stream_data['rateArray'],
|
||||
key=lambda x: (x['iBitRate'], x['sDisplayName']),
|
||||
reverse=True
|
||||
)
|
||||
cdn_urls = []
|
||||
for cdn in stream_data['multiLine']:
|
||||
quality_urls = []
|
||||
for quality in qualities:
|
||||
quality_name = quality['sDisplayName']
|
||||
bit_rate = quality['iBitRate']
|
||||
base_url = cdn['url']
|
||||
if bit_rate > 0:
|
||||
if '.m3u8' in base_url:
|
||||
new_url = base_url.replace(
|
||||
'ratio=2000',
|
||||
f'ratio={bit_rate}'
|
||||
)
|
||||
else:
|
||||
new_url = base_url.replace(
|
||||
'imgplus.flv',
|
||||
f'imgplus_{bit_rate}.flv'
|
||||
)
|
||||
else:
|
||||
new_url = base_url
|
||||
quality_urls.extend([quality_name, new_url])
|
||||
encoded_urls = self.e64(json.dumps(quality_urls))
|
||||
cdn_urls.append(f"{cdn['cdnType']}${ids[0]}@@{encoded_urls}")
|
||||
|
||||
if cdn_urls:
|
||||
plist.append('#'.join(cdn_urls))
|
||||
vod['vod_play_from'] = "$$$".join(names)
|
||||
vod['vod_play_url'] = "$$$".join(plist)
|
||||
return vod
|
||||
except Exception as e:
|
||||
return self.handle_exception(e)
|
||||
|
||||
def douyinDetail(self, ids):
|
||||
url = f'{self.hosts[ids[0]]}/webcast/room/web/enter/?aid=6383&app_name=douyin_web&live_id=1&device_platform=web&enter_from=web_live&web_rid={ids[1]}&room_id_str=&enter_source=&Room-Enter-User-Login-Ab=0&is_need_double_stream=false&cookie_enabled=true&screen_width=1980&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Edge&browser_version=125.0.0.0'
|
||||
data = self.fetch(url, headers=self.dyheaders).json()
|
||||
try:
|
||||
vdata = data['data']['data'][0]
|
||||
vod = self.buildvod(
|
||||
vod_name=vdata['title'],
|
||||
vod_remarks=vdata['user_count_str'],
|
||||
)
|
||||
resolution_data = vdata['stream_url']['live_core_sdk_data']['pull_data']['options']['qualities']
|
||||
stream_json = vdata['stream_url']['live_core_sdk_data']['pull_data']['stream_data']
|
||||
stream_json = json.loads(stream_json)
|
||||
available_types = []
|
||||
if any(sdk_key in stream_json['data'] and 'main' in stream_json['data'][sdk_key] for sdk_key in
|
||||
stream_json['data']):
|
||||
available_types.append('main')
|
||||
if any(sdk_key in stream_json['data'] and 'backup' in stream_json['data'][sdk_key] for sdk_key in
|
||||
stream_json['data']):
|
||||
available_types.append('backup')
|
||||
plist = []
|
||||
for line_type in available_types:
|
||||
format_arrays = {'flv': [], 'hls': [], 'lls': []}
|
||||
qualities = sorted(resolution_data, key=lambda x: x['level'], reverse=True)
|
||||
for quality in qualities:
|
||||
sdk_key = quality['sdk_key']
|
||||
if sdk_key in stream_json['data'] and line_type in stream_json['data'][sdk_key]:
|
||||
stream_info = stream_json['data'][sdk_key][line_type]
|
||||
if stream_info.get('flv'):
|
||||
format_arrays['flv'].extend([quality['name'], stream_info['flv']])
|
||||
if stream_info.get('hls'):
|
||||
format_arrays['hls'].extend([quality['name'], stream_info['hls']])
|
||||
if stream_info.get('lls'):
|
||||
format_arrays['lls'].extend([quality['name'], stream_info['lls']])
|
||||
format_urls = []
|
||||
for format_name, url_array in format_arrays.items():
|
||||
if url_array:
|
||||
encoded_urls = self.e64(json.dumps(url_array))
|
||||
format_urls.append(f"{format_name}${ids[0]}@@{encoded_urls}")
|
||||
|
||||
if format_urls:
|
||||
plist.append('#'.join(format_urls))
|
||||
|
||||
names = ['线路1', '线路2'][:len(plist)]
|
||||
vod['vod_play_from'] = "$$$".join(names)
|
||||
vod['vod_play_url'] = "$$$".join(plist)
|
||||
return vod
|
||||
|
||||
except Exception as e:
|
||||
return self.handle_exception(e)
|
||||
|
||||
def douyuDetail(self, ids):
|
||||
headers = self.gethr(0, zr=f'{self.hosts[ids[0]]}/{ids[1]}')
|
||||
try:
|
||||
data = self.fetch(f'{self.hosts[ids[0]]}/betard/{ids[1]}', headers=headers).json()
|
||||
vname = data['room']['room_name']
|
||||
vod = self.buildvod(
|
||||
vod_name=vname,
|
||||
vod_remarks=data['room'].get('second_lvl_name'),
|
||||
vod_director=data['room'].get('nickname'),
|
||||
)
|
||||
vdata = self.fetch(f'{self.hosts[ids[0]]}/swf_api/homeH5Enc?rids={ids[1]}', headers=headers).json()
|
||||
json_body = vdata['data']
|
||||
json_body = {"html": self.douyu_text(json_body[f'room{ids[1]}']), "rid": ids[1]}
|
||||
sign = self.post('http://alive.nsapps.cn/api/AllLive/DouyuSign', json=json_body, headers=self.headers[1]).json()['data']
|
||||
body = f'{sign}&cdn=&rate=-1&ver=Douyu_223061205&iar=1&ive=1&hevc=0&fa=0'
|
||||
body=self.params_to_json(body)
|
||||
nubdata = self.post(f'{self.hosts[ids[0]]}/lapi/live/getH5Play/{ids[1]}', data=body, headers=headers).json()
|
||||
plist = []
|
||||
names = []
|
||||
for i,x in enumerate(nubdata['data']['cdnsWithName']):
|
||||
names.append(f'线路{i+1}')
|
||||
d = {'sign': sign, 'cdn': x['cdn'], 'id': ids[1]}
|
||||
plist.append(
|
||||
f'{vname}${ids[0]}@@{self.e64(json.dumps(d))}@@{self.e64(json.dumps(nubdata["data"]["multirates"]))}')
|
||||
vod['vod_play_from'] = "$$$".join(names)
|
||||
vod['vod_play_url'] = "$$$".join(plist)
|
||||
return vod
|
||||
except Exception as e:
|
||||
return self.handle_exception(e)
|
||||
|
||||
def douyu_text(self, text):
|
||||
function_positions = [m.start() for m in re.finditer('function', text)]
|
||||
total_functions = len(function_positions)
|
||||
if total_functions % 2 == 0:
|
||||
target_index = total_functions // 2 + 1
|
||||
else:
|
||||
target_index = (total_functions - 1) // 2 + 1
|
||||
if total_functions >= target_index:
|
||||
cut_position = function_positions[target_index - 1]
|
||||
ctext = text[4:cut_position]
|
||||
return re.sub(r'eval\(strc\)\([\w\d,]+\)', 'strc', ctext)
|
||||
return text
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
pass
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
try:
|
||||
ids = id.split('@@')
|
||||
p = 1
|
||||
if ids[0] in ['wangyi', 'douyin','huya']:
|
||||
p, url = 0, json.loads(self.d64(ids[1]))
|
||||
elif ids[0] == 'bili':
|
||||
p, url = self.biliplay(ids)
|
||||
elif ids[0] == 'huya':
|
||||
p, url = 0, json.loads(self.d64(ids[1]))
|
||||
elif ids[0] == 'douyu':
|
||||
p, url = self.douyuplay(ids)
|
||||
return {'parse': p, 'url': url, 'header': self.playheaders[ids[0]]}
|
||||
except Exception as e:
|
||||
return {'parse': 1, 'url': self.excepturl, 'header': self.headers[0]}
|
||||
|
||||
def biliplay(self, ids):
|
||||
try:
|
||||
data = self.fetch(
|
||||
f'{self.hosts[ids[0]][0]}/xlive/web-room/v2/index/getRoomPlayInfo?room_id={ids[1]}&protocol=0,1&format=0,2&codec=0&platform=web&qn={ids[2]}',
|
||||
headers=self.gethr(0, ids[0])).json()
|
||||
urls = []
|
||||
line_index = 1
|
||||
for stream in data['data']['playurl_info']['playurl']['stream']:
|
||||
for format_item in stream['format']:
|
||||
for codec in format_item['codec']:
|
||||
for url_info in codec['url_info']:
|
||||
full_url = f"{url_info['host']}/{codec['base_url'].lstrip('/')}{url_info['extra']}"
|
||||
urls.extend([f"线路{line_index}", full_url])
|
||||
line_index += 1
|
||||
return 0, urls
|
||||
except Exception as e:
|
||||
return 1, self.excepturl
|
||||
|
||||
def douyuplay(self, ids):
|
||||
try:
|
||||
sdata = json.loads(self.d64(ids[1]))
|
||||
headers = self.gethr(0, zr=f'{self.hosts[ids[0]]}/{sdata["id"]}')
|
||||
ldata = json.loads(self.d64(ids[2]))
|
||||
result_obj = {}
|
||||
with ThreadPoolExecutor(max_workers=len(ldata)) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
self.douyufp,
|
||||
sdata,
|
||||
quality,
|
||||
headers,
|
||||
self.hosts[ids[0]],
|
||||
result_obj
|
||||
) for quality in ldata
|
||||
]
|
||||
for future in futures:
|
||||
future.result()
|
||||
|
||||
result = []
|
||||
for bit in sorted(result_obj.keys(), reverse=True):
|
||||
result.extend(result_obj[bit])
|
||||
|
||||
if result:
|
||||
return 0, result
|
||||
return 1, self.excepturl
|
||||
|
||||
except Exception as e:
|
||||
return 1, self.excepturl
|
||||
|
||||
def douyufp(self, sdata, quality, headers, host, result_obj):
|
||||
try:
|
||||
body = f'{sdata["sign"]}&cdn={sdata["cdn"]}&rate={quality["rate"]}'
|
||||
body=self.params_to_json(body)
|
||||
data = self.post(f'{host}/lapi/live/getH5Play/{sdata["id"]}',
|
||||
data=body, headers=headers).json()
|
||||
if data.get('data'):
|
||||
play_url = data['data']['rtmp_url'] + '/' + data['data']['rtmp_live']
|
||||
bit = quality.get('bit', 0)
|
||||
if bit not in result_obj:
|
||||
result_obj[bit] = []
|
||||
result_obj[bit].extend([quality['name'], play_url])
|
||||
except Exception as e:
|
||||
print(f"Error fetching {quality['name']}: {str(e)}")
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def e64(self, text):
|
||||
try:
|
||||
text_bytes = text.encode('utf-8')
|
||||
encoded_bytes = b64encode(text_bytes)
|
||||
return encoded_bytes.decode('utf-8')
|
||||
except Exception as e:
|
||||
print(f"Base64编码错误: {str(e)}")
|
||||
return ""
|
||||
|
||||
def d64(self, encoded_text):
|
||||
try:
|
||||
encoded_bytes = encoded_text.encode('utf-8')
|
||||
decoded_bytes = b64decode(encoded_bytes)
|
||||
return decoded_bytes.decode('utf-8')
|
||||
except Exception as e:
|
||||
print(f"Base64解码错误: {str(e)}")
|
||||
return ""
|
||||
|
||||
def josn_to_params(self, params, skip_empty=False):
|
||||
query = []
|
||||
for k, v in params.items():
|
||||
if skip_empty and not v:
|
||||
continue
|
||||
query.append(f"{k}={v}")
|
||||
return "&".join(query)
|
||||
|
||||
def params_to_json(self, query_string):
|
||||
parsed_data = parse_qs(query_string)
|
||||
result = {key: value[0] for key, value in parsed_data.items()}
|
||||
return result
|
||||
|
||||
def buildvod(self, vod_id='', vod_name='', vod_pic='', vod_year='', vod_tag='', vod_remarks='', style='',
|
||||
type_name='', vod_area='', vod_actor='', vod_director='',
|
||||
vod_content='', vod_play_from='', vod_play_url=''):
|
||||
vod = {
|
||||
'vod_id': vod_id,
|
||||
'vod_name': vod_name,
|
||||
'vod_pic': vod_pic,
|
||||
'vod_year': vod_year,
|
||||
'vod_tag': 'folder' if vod_tag else '',
|
||||
'vod_remarks': vod_remarks,
|
||||
'style': style,
|
||||
'type_name': type_name,
|
||||
'vod_area': vod_area,
|
||||
'vod_actor': vod_actor,
|
||||
'vod_director': vod_director,
|
||||
'vod_content': vod_content,
|
||||
'vod_play_from': vod_play_from,
|
||||
'vod_play_url': vod_play_url
|
||||
}
|
||||
vod = {key: value for key, value in vod.items() if value}
|
||||
return vod
|
||||
|
||||
def getpq(self, url, headers=None, cookies=None):
|
||||
data = self.fetch(url, headers=headers, cookies=cookies).text
|
||||
try:
|
||||
return pq(data)
|
||||
except Exception as e:
|
||||
print(f"解析页面错误: {str(e)}")
|
||||
return pq(data.encode('utf-8'))
|
||||
|
||||
def gethr(self, index, rf='', zr=''):
|
||||
headers = self.headers[index]
|
||||
if zr:
|
||||
headers['referer'] = zr
|
||||
else:
|
||||
headers['referer'] = f"{self.referers[rf]}/"
|
||||
return headers
|
||||
|
||||
def handle_exception(self, e):
|
||||
print(f"报错: {str(e)}")
|
||||
return {'vod_play_from': '哎呀翻车啦', 'vod_play_url': f'翻车啦${self.excepturl}'}
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import sys
|
||||
import threading
|
||||
import uuid
|
||||
import requests
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
import time
|
||||
from Crypto.Hash import MD5, SHA1
|
||||
|
||||
class Spider(Spider):
|
||||
'''
|
||||
配置示例:
|
||||
{
|
||||
"key": "xxxx",
|
||||
"name": "xxxx",
|
||||
"type": 3,
|
||||
"api": ".所在路径/金牌.py",
|
||||
"searchable": 1,
|
||||
"quickSearch": 1,
|
||||
"filterable": 1,
|
||||
"changeable": 1,
|
||||
"ext": {
|
||||
"site": "https://www.jiabaide.cn,域名2,域名3"
|
||||
}
|
||||
},
|
||||
'''
|
||||
def init(self, extend=""):
|
||||
if extend:
|
||||
hosts=json.loads(extend)['site']
|
||||
self.host = self.host_late(hosts)
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
def homeContent(self, filter):
|
||||
cdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/get/filer/type", headers=self.getheaders()).json()
|
||||
fdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/v1/get/filer/list", headers=self.getheaders()).json()
|
||||
result = {}
|
||||
classes = []
|
||||
filters={}
|
||||
for k in cdata['data']:
|
||||
classes.append({
|
||||
'type_name': k['typeName'],
|
||||
'type_id': str(k['typeId']),
|
||||
})
|
||||
sort_values = [{"n": "最近更新", "v": "2"},{"n": "人气高低", "v": "3"}, {"n": "评分高低", "v": "4"}]
|
||||
for tid, d in fdata['data'].items():
|
||||
current_sort_values = sort_values.copy()
|
||||
if tid == '1':
|
||||
del current_sort_values[0]
|
||||
filters[tid] = [
|
||||
{"key": "type", "name": "类型",
|
||||
"value": [{"n": i["itemText"], "v": i["itemValue"]} for i in d["typeList"]]},
|
||||
|
||||
*([] if not d["plotList"] else [{"key": "v_class", "name": "剧情",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]}
|
||||
for i in d["plotList"]]}]),
|
||||
|
||||
{"key": "area", "name": "地区",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["districtList"]]},
|
||||
|
||||
{"key": "year", "name": "年份",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["yearList"]]},
|
||||
|
||||
{"key": "lang", "name": "语言",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["languageList"]]},
|
||||
|
||||
{"key": "sort", "name": "排序", "value": current_sort_values}
|
||||
]
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
data1 = self.fetch(f"{self.host}/api/mw-movie/anonymous/v1/home/all/list", headers=self.getheaders()).json()
|
||||
data2=self.fetch(f"{self.host}/api/mw-movie/anonymous/home/hotSearch",headers=self.getheaders()).json()
|
||||
data=[]
|
||||
for i in data1['data'].values():
|
||||
data.extend(i['list'])
|
||||
data.extend(data2['data'])
|
||||
vods=self.getvod(data)
|
||||
return {'list':vods}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
|
||||
params = {
|
||||
"area": extend.get('area', ''),
|
||||
"filterStatus": "1",
|
||||
"lang": extend.get('lang', ''),
|
||||
"pageNum": pg,
|
||||
"pageSize": "30",
|
||||
"sort": extend.get('sort', '1'),
|
||||
"sortBy": "1",
|
||||
"type": extend.get('type', ''),
|
||||
"type1": tid,
|
||||
"v_class": extend.get('v_class', ''),
|
||||
"year": extend.get('year', '')
|
||||
}
|
||||
data = self.fetch(f"{self.host}/api/mw-movie/anonymous/video/list?{self.js(params)}", headers=self.getheaders(params)).json()
|
||||
result = {}
|
||||
result['list'] = self.getvod(data['data']['list'])
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
data=self.fetch(f"{self.host}/api/mw-movie/anonymous/video/detail?id={ids[0]}",headers=self.getheaders({'id':ids[0]})).json()
|
||||
vod=self.getvod([data['data']])[0]
|
||||
vod['vod_play_from']='金牌'
|
||||
vod['vod_play_url'] = '#'.join(
|
||||
f"{i['name'] if len(vod['episodelist']) > 1 else vod['vod_name']}${ids[0]}@@{i['nid']}" for i in
|
||||
vod['episodelist'])
|
||||
vod.pop('episodelist', None)
|
||||
return {'list':[vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
params = {
|
||||
"keyword": key,
|
||||
"pageNum": pg,
|
||||
"pageSize": "8",
|
||||
"sourceCode": "1"
|
||||
}
|
||||
data=self.fetch(f"{self.host}/api/mw-movie/anonymous/video/searchByWord?{self.js(params)}",headers=self.getheaders(params)).json()
|
||||
vods=self.getvod(data['data']['result']['list'])
|
||||
return {'list':vods,'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
self.header = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'DNT': '1',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'Origin': self.host,
|
||||
'Referer': f'{self.host}/'
|
||||
}
|
||||
ids=id.split('@@')
|
||||
pdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/v2/video/episode/url?clientType=1&id={ids[0]}&nid={ids[1]}",headers=self.getheaders({'clientType':'1','id': ids[0], 'nid': ids[1]})).json()
|
||||
vlist=[]
|
||||
for i in pdata['data']['list']:vlist.extend([i['resolutionName'],i['url']])
|
||||
return {'parse':0,'url':vlist,'header':self.header}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def host_late(self, url_list):
|
||||
if isinstance(url_list, str):
|
||||
urls = [u.strip() for u in url_list.split(',')]
|
||||
else:
|
||||
urls = url_list
|
||||
if len(urls) <= 1:
|
||||
return urls[0] if urls else ''
|
||||
|
||||
results = {}
|
||||
threads = []
|
||||
|
||||
def test_host(url):
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = requests.head(url, timeout=1.0, allow_redirects=False)
|
||||
delay = (time.time() - start_time) * 1000
|
||||
results[url] = delay
|
||||
except Exception as e:
|
||||
results[url] = float('inf')
|
||||
for url in urls:
|
||||
t = threading.Thread(target=test_host, args=(url,))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
return min(results.items(), key=lambda x: x[1])[0]
|
||||
|
||||
def md5(self, sign_key):
|
||||
md5_hash = MD5.new()
|
||||
md5_hash.update(sign_key.encode('utf-8'))
|
||||
md5_result = md5_hash.hexdigest()
|
||||
return md5_result
|
||||
|
||||
def js(self, param):
|
||||
return '&'.join(f"{k}={v}" for k, v in param.items())
|
||||
|
||||
def getheaders(self, param=None):
|
||||
if param is None:param = {}
|
||||
t=str(int(time.time()*1000))
|
||||
param['key']='cb808529bae6b6be45ecfab29a4889bc'
|
||||
param['t']=t
|
||||
sha1_hash = SHA1.new()
|
||||
sha1_hash.update(self.md5(self.js(param)).encode('utf-8'))
|
||||
sign = sha1_hash.hexdigest()
|
||||
deviceid = str(uuid.uuid4())
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36',
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'sign': sign,
|
||||
't': t,
|
||||
'deviceid':deviceid
|
||||
}
|
||||
return headers
|
||||
|
||||
def convert_field_name(self, field):
|
||||
field = field.lower()
|
||||
if field.startswith('vod') and len(field) > 3:
|
||||
field = field.replace('vod', 'vod_')
|
||||
if field.startswith('type') and len(field) > 4:
|
||||
field = field.replace('type', 'type_')
|
||||
return field
|
||||
|
||||
def getvod(self, array):
|
||||
return [{self.convert_field_name(k): v for k, v in item.items()} for item in array]
|
||||
|
BIN
spider.jar
BIN
spider.jar
Binary file not shown.
Loading…
Reference in New Issue