791 lines
32 KiB
Python
791 lines
32 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
# 偷乐短剧爬虫
|
||
|
||
import sys
|
||
import json
|
||
import re
|
||
import time
|
||
import urllib.parse
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
|
||
# 导入基础类
|
||
sys.path.append('../../')
|
||
try:
|
||
from base.spider import Spider
|
||
except ImportError:
|
||
# 本地调试时的替代实现
|
||
class Spider:
|
||
def init(self, extend=""):
|
||
pass
|
||
|
||
class Spider(Spider):
|
||
def __init__(self):
|
||
# 网站主URL
|
||
self.siteUrl = "https://www.toule.top"
|
||
|
||
# 根据网站实际结构,分类链接格式为: /index.php/vod/show/class/分类名/id/1.html
|
||
# 分类ID映射 - 从网站中提取的分类
|
||
self.cateManual = {
|
||
"男频": "/index.php/vod/show/class/%E7%94%B7%E9%A2%91/id/1.html",
|
||
"女频": "/index.php/vod/show/class/%E5%A5%B3%E9%A2%91/id/1.html",
|
||
"都市": "/index.php/vod/show/class/%E9%83%BD%E5%B8%82/id/1.html",
|
||
"赘婿": "/index.php/vod/show/class/%E8%B5%98%E5%A9%BF/id/1.html",
|
||
"战神": "/index.php/vod/show/class/%E6%88%98%E7%A5%9E/id/1.html",
|
||
"古代言情": "/index.php/vod/show/class/%E5%8F%A4%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
|
||
"现代言情": "/index.php/vod/show/class/%E7%8E%B0%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
|
||
"历史": "/index.php/vod/show/class/%E5%8E%86%E5%8F%B2/id/1.html",
|
||
"玄幻": "/index.php/vod/show/class/%E7%8E%84%E5%B9%BB/id/1.html",
|
||
"搞笑": "/index.php/vod/show/class/%E6%90%9E%E7%AC%91/id/1.html",
|
||
"甜宠": "/index.php/vod/show/class/%E7%94%9C%E5%AE%A0/id/1.html",
|
||
"励志": "/index.php/vod/show/class/%E5%8A%B1%E5%BF%97/id/1.html",
|
||
"逆袭": "/index.php/vod/show/class/%E9%80%86%E8%A2%AD/id/1.html",
|
||
"穿越": "/index.php/vod/show/class/%E7%A9%BF%E8%B6%8A/id/1.html",
|
||
"古装": "/index.php/vod/show/class/%E5%8F%A4%E8%A3%85/id/1.html"
|
||
}
|
||
|
||
# 请求头
|
||
self.headers = {
|
||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||
"Referer": "https://www.toule.top/",
|
||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||
"Accept-Encoding": "gzip, deflate, br",
|
||
"Connection": "keep-alive",
|
||
}
|
||
|
||
|
||
# 缓存
|
||
self.cache = {}
|
||
self.cache_timeout = {}
|
||
|
||
def getName(self):
|
||
return "偷乐短剧"
|
||
|
||
def init(self, extend=""):
|
||
# 初始化方法,可以留空
|
||
return
|
||
|
||
def isVideoFormat(self, url):
|
||
"""判断是否为视频格式"""
|
||
video_formats = ['.mp4', '.m3u8', '.ts', '.flv', '.avi', '.mkv', '.mov', '.rmvb', '.3gp']
|
||
for format in video_formats:
|
||
if format in url.lower():
|
||
return True
|
||
return False
|
||
|
||
def manualVideoCheck(self):
|
||
"""是否需要手动检查视频"""
|
||
return False
|
||
|
||
# 工具方法 - 网络请求
|
||
def fetch(self, url, headers=None, data=None, method="GET"):
|
||
"""统一的网络请求方法"""
|
||
try:
|
||
if headers is None:
|
||
headers = self.headers.copy()
|
||
|
||
if method.upper() == "GET":
|
||
response = requests.get(url, headers=headers, params=data, timeout=10,verify=False)
|
||
else: # POST
|
||
response = requests.post(url, headers=headers, data=data, timeout=10,verify=False)
|
||
|
||
response.raise_for_status()
|
||
response.encoding = response.apparent_encoding or 'utf-8'
|
||
return response
|
||
except Exception as e:
|
||
self.log(f"请求失败: {url}, 错误: {str(e)}", "ERROR")
|
||
return None
|
||
|
||
# 缓存方法
|
||
def getCache(self, key, timeout=3600):
|
||
"""获取缓存数据"""
|
||
if key in self.cache and key in self.cache_timeout:
|
||
if time.time() < self.cache_timeout[key]:
|
||
return self.cache[key]
|
||
else:
|
||
del self.cache[key]
|
||
del self.cache_timeout[key]
|
||
return None
|
||
|
||
def setCache(self, key, value, timeout=3600):
|
||
"""设置缓存数据"""
|
||
self.cache[key] = value
|
||
self.cache_timeout[key] = time.time() + timeout
|
||
|
||
# 日志方法
|
||
def log(self, msg, level='INFO'):
|
||
"""记录日志"""
|
||
levels = {
|
||
'DEBUG': 0,
|
||
'INFO': 1,
|
||
'WARNING': 2,
|
||
'ERROR': 3
|
||
}
|
||
|
||
current_level = 'INFO' # 可以设置为DEBUG以获取更多信息
|
||
|
||
if levels.get(level, 4) >= levels.get(current_level, 1):
|
||
print(f"[{level}] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}")
|
||
|
||
# 辅助方法 - 从URL中提取视频ID
|
||
def extractVodId(self, url):
|
||
"""从URL中提取视频ID"""
|
||
# 路径格式: /index.php/vod/play/id/9024/sid/1/nid/1.html
|
||
match = re.search(r'/id/(\d+)/', url)
|
||
if match:
|
||
return match.group(1)
|
||
return ""
|
||
|
||
# 辅助方法 - 从网页内容中提取分类
|
||
def extractCategories(self, text):
|
||
"""从网页内容中提取分类标签"""
|
||
cats = []
|
||
# 匹配标签字符串,例如: "男频,逆袭,亲情,短剧"
|
||
if "," in text:
|
||
parts = text.split(",")
|
||
for part in parts:
|
||
part = part.strip()
|
||
if part and part != "短剧":
|
||
cats.append(part)
|
||
return cats
|
||
|
||
# 主要接口实现
|
||
def homeContent(self, filter):
|
||
"""获取首页分类及内容"""
|
||
result = {}
|
||
classes = []
|
||
|
||
# 从缓存获取
|
||
cache_key = 'home_classes'
|
||
cached_classes = self.getCache(cache_key)
|
||
if cached_classes:
|
||
classes = cached_classes
|
||
else:
|
||
# 使用预定义的分类
|
||
for k, v in self.cateManual.items():
|
||
classes.append({
|
||
'type_id': v, # 使用完整URL路径作为type_id
|
||
'type_name': k
|
||
})
|
||
|
||
# 保存到缓存
|
||
self.setCache(cache_key, classes, 24*3600) # 缓存24小时
|
||
|
||
result['class'] = classes
|
||
|
||
# 获取首页推荐视频
|
||
videos = self.homeVideoContent().get('list', [])
|
||
result['list'] = videos
|
||
|
||
return result
|
||
|
||
def homeVideoContent(self):
|
||
"""获取首页推荐视频内容"""
|
||
result = {'list': []}
|
||
videos = []
|
||
|
||
# 从缓存获取
|
||
cache_key = 'home_videos'
|
||
cached_videos = self.getCache(cache_key)
|
||
if cached_videos:
|
||
return {'list': cached_videos}
|
||
|
||
try:
|
||
response = self.fetch(self.siteUrl)
|
||
if response and response.status_code == 200:
|
||
html = response.text
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
|
||
# 查找最新更新区域
|
||
latest_section = soup.find('h2', text=lambda t: t and '最新更新' in t)
|
||
if latest_section:
|
||
container = latest_section.parent # 获取容器
|
||
if container:
|
||
# 查找所有 li.item 元素
|
||
items = container.find_all('li', class_='item')
|
||
|
||
for item in items:
|
||
try:
|
||
# 获取链接和标题
|
||
title_link = item.find('h3')
|
||
if not title_link:
|
||
continue
|
||
|
||
title = title_link.text.strip()
|
||
|
||
# 获取第一个链接作为详情页链接
|
||
link_tag = item.find('a')
|
||
if not link_tag:
|
||
continue
|
||
|
||
link = link_tag.get('href', '')
|
||
if not link.startswith('http'):
|
||
link = urllib.parse.urljoin(self.siteUrl, link)
|
||
|
||
# 提取ID
|
||
vid = self.extractVodId(link)
|
||
if not vid:
|
||
continue
|
||
|
||
# 获取图片
|
||
img_tag = item.find('img')
|
||
img_url = ""
|
||
if img_tag:
|
||
img_url = img_tag.get('src', img_tag.get('data-src', ''))
|
||
if img_url and not img_url.startswith('http'):
|
||
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
|
||
|
||
# 获取备注信息
|
||
remarks = ""
|
||
remarks_tag = item.find('span', class_='remarks')
|
||
if remarks_tag:
|
||
remarks = remarks_tag.text.strip()
|
||
|
||
# 获取标签信息
|
||
tags = ""
|
||
tags_tag = item.find('span', class_='tags')
|
||
if tags_tag:
|
||
tags = tags_tag.text.strip()
|
||
|
||
# 合并备注和标签
|
||
if remarks and tags:
|
||
remarks = f"{remarks} | {tags}"
|
||
elif tags:
|
||
remarks = tags
|
||
|
||
# 构建视频项
|
||
videos.append({
|
||
'vod_id': vid,
|
||
'vod_name': title,
|
||
'vod_pic': img_url,
|
||
'vod_remarks': remarks
|
||
})
|
||
except Exception as e:
|
||
self.log(f"处理视频项时出错: {str(e)}", "ERROR")
|
||
continue
|
||
|
||
# 保存到缓存
|
||
self.setCache(cache_key, videos, 3600) # 缓存1小时
|
||
except Exception as e:
|
||
self.log(f"获取首页视频内容发生错误: {str(e)}", "ERROR")
|
||
|
||
result['list'] = videos
|
||
return result
|
||
|
||
def categoryContent(self, tid, pg, filter, extend):
|
||
"""获取分类内容"""
|
||
result = {}
|
||
videos = []
|
||
|
||
# 处理页码
|
||
if pg is None:
|
||
pg = 1
|
||
else:
|
||
pg = int(pg)
|
||
|
||
# 构建分类URL - tid是完整的URL路径
|
||
if tid.startswith("/"):
|
||
# 替换页码,URL格式可能像: /index.php/vod/show/class/男频/id/1.html
|
||
if pg > 1:
|
||
if "html" in tid:
|
||
category_url = tid.replace(".html", f"/page/{pg}.html")
|
||
else:
|
||
category_url = f"{tid}/page/{pg}.html"
|
||
else:
|
||
category_url = tid
|
||
|
||
full_url = urllib.parse.urljoin(self.siteUrl, category_url)
|
||
else:
|
||
# 如果tid不是URL路径,可能是旧版分类ID,尝试查找对应URL
|
||
category_url = ""
|
||
for name, url in self.cateManual.items():
|
||
if name == tid:
|
||
category_url = url
|
||
break
|
||
|
||
if not category_url:
|
||
self.log(f"未找到分类ID对应的URL: {tid}", "ERROR")
|
||
result['list'] = []
|
||
result['page'] = pg
|
||
result['pagecount'] = 1
|
||
result['limit'] = 0
|
||
result['total'] = 0
|
||
return result
|
||
|
||
# 处理页码
|
||
if pg > 1:
|
||
if "html" in category_url:
|
||
category_url = category_url.replace(".html", f"/page/{pg}.html")
|
||
else:
|
||
category_url = f"{category_url}/page/{pg}.html"
|
||
|
||
full_url = urllib.parse.urljoin(self.siteUrl, category_url)
|
||
|
||
# 请求分类页
|
||
try:
|
||
response = self.fetch(full_url)
|
||
if response and response.status_code == 200:
|
||
html = response.text
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
|
||
# 查找视频项,根据实际HTML结构调整
|
||
items = soup.find_all('li', class_='item')
|
||
|
||
for item in items:
|
||
try:
|
||
# 获取链接和标题
|
||
title_tag = item.find('h3')
|
||
if not title_tag:
|
||
continue
|
||
|
||
title = title_tag.text.strip()
|
||
|
||
# 获取链接
|
||
link_tag = item.find('a')
|
||
if not link_tag:
|
||
continue
|
||
|
||
link = link_tag.get('href', '')
|
||
if not link.startswith('http'):
|
||
link = urllib.parse.urljoin(self.siteUrl, link)
|
||
|
||
# 提取ID
|
||
vid = self.extractVodId(link)
|
||
if not vid:
|
||
continue
|
||
|
||
# 获取图片
|
||
img_tag = item.find('img')
|
||
img_url = ""
|
||
if img_tag:
|
||
img_url = img_tag.get('src', img_tag.get('data-src', ''))
|
||
if img_url and not img_url.startswith('http'):
|
||
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
|
||
|
||
# 获取备注信息
|
||
remarks = ""
|
||
remarks_tag = item.find('span', class_='remarks')
|
||
if remarks_tag:
|
||
remarks = remarks_tag.text.strip()
|
||
|
||
# 获取标签信息
|
||
tags = ""
|
||
tags_tag = item.find('span', class_='tags')
|
||
if tags_tag:
|
||
tags = tags_tag.text.strip()
|
||
|
||
# 合并备注和标签
|
||
if remarks and tags:
|
||
remarks = f"{remarks} | {tags}"
|
||
elif tags:
|
||
remarks = tags
|
||
|
||
# 构建视频项
|
||
videos.append({
|
||
'vod_id': vid,
|
||
'vod_name': title,
|
||
'vod_pic': img_url,
|
||
'vod_remarks': remarks
|
||
})
|
||
except Exception as e:
|
||
self.log(f"处理分类视频项时出错: {str(e)}", "ERROR")
|
||
continue
|
||
|
||
# 查找分页信息
|
||
# 默认值
|
||
total = len(videos)
|
||
pagecount = 1
|
||
limit = 20
|
||
|
||
# 尝试查找分页元素
|
||
pagination = soup.find('ul', class_='page')
|
||
if pagination:
|
||
# 查找最后一页的链接
|
||
last_page_links = pagination.find_all('a')
|
||
for link in last_page_links:
|
||
page_text = link.text.strip()
|
||
if page_text.isdigit():
|
||
pagecount = max(pagecount, int(page_text))
|
||
except Exception as e:
|
||
self.log(f"获取分类内容发生错误: {str(e)}", "ERROR")
|
||
|
||
result['list'] = videos
|
||
result['page'] = pg
|
||
result['pagecount'] = pagecount
|
||
result['limit'] = limit
|
||
result['total'] = total
|
||
|
||
return result
|
||
|
||
def detailContent(self, ids):
|
||
"""获取详情内容"""
|
||
result = {}
|
||
|
||
if not ids or len(ids) == 0:
|
||
return result
|
||
|
||
# 视频ID
|
||
vid = ids[0]
|
||
|
||
# 构建播放页URL
|
||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
|
||
|
||
try:
|
||
response = self.fetch(play_url)
|
||
if not response or response.status_code != 200:
|
||
return result
|
||
|
||
html = response.text
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
|
||
# 提取视频基本信息
|
||
# 标题
|
||
title = ""
|
||
title_tag = soup.find('h1', class_='items-title')
|
||
if title_tag:
|
||
title = title_tag.text.strip()
|
||
|
||
# 图片
|
||
pic = ""
|
||
pic_tag = soup.find('img', class_='thumb')
|
||
if pic_tag:
|
||
pic = pic_tag.get('src', '')
|
||
if pic and not pic.startswith('http'):
|
||
pic = urllib.parse.urljoin(self.siteUrl, pic)
|
||
|
||
# 简介
|
||
desc = ""
|
||
desc_tag = soup.find('div', class_='text-content')
|
||
if desc_tag:
|
||
desc = desc_tag.text.strip()
|
||
|
||
# 标签/分类
|
||
tags = []
|
||
tags_container = soup.find('span', class_='items-tags')
|
||
if tags_container:
|
||
tag_links = tags_container.find_all('a')
|
||
for tag in tag_links:
|
||
tag_text = tag.text.strip()
|
||
if tag_text:
|
||
tags.append(tag_text)
|
||
|
||
# 提取播放列表
|
||
play_from = "偷乐短剧"
|
||
play_list = []
|
||
|
||
# 查找播放列表区域
|
||
play_area = soup.find('div', class_='swiper-wrapper')
|
||
if play_area:
|
||
# 查找所有剧集链接
|
||
episode_links = play_area.find_all('a')
|
||
for ep in episode_links:
|
||
ep_title = ep.text.strip()
|
||
ep_url = ep.get('href', '')
|
||
|
||
if ep_url:
|
||
# 直接使用URL作为ID
|
||
if not ep_url.startswith('http'):
|
||
ep_url = urllib.parse.urljoin(self.siteUrl, ep_url)
|
||
|
||
# 提取集数信息
|
||
ep_num = ep_title
|
||
if ep_num.isdigit():
|
||
ep_num = f"第{ep_num}集"
|
||
|
||
play_list.append(f"{ep_num}${ep_url}")
|
||
|
||
# 如果没有找到播放列表,查找播放按钮
|
||
if not play_list:
|
||
play_btn = soup.find('a', class_='btn-play')
|
||
if play_btn:
|
||
play_url = play_btn.get('href', '')
|
||
if play_url:
|
||
if not play_url.startswith('http'):
|
||
play_url = urllib.parse.urljoin(self.siteUrl, play_url)
|
||
|
||
play_list.append(f"播放${play_url}")
|
||
|
||
# 如果仍然没有找到播放链接,使用播放页URL
|
||
if not play_list:
|
||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
|
||
play_list.append(f"播放${play_url}")
|
||
|
||
# 提取更多信息(导演、演员等)
|
||
director = ""
|
||
actor = ""
|
||
year = ""
|
||
area = ""
|
||
remarks = ""
|
||
|
||
# 查找备注信息
|
||
meta_items = soup.find_all('div', class_='meta-item')
|
||
for item in meta_items:
|
||
item_title = item.find('span', class_='item-title')
|
||
item_content = item.find('span', class_='item-content')
|
||
|
||
if item_title and item_content:
|
||
title_text = item_title.text.strip()
|
||
content_text = item_content.text.strip()
|
||
|
||
if "导演" in title_text:
|
||
director = content_text
|
||
elif "主演" in title_text:
|
||
actor = content_text
|
||
elif "年份" in title_text:
|
||
year = content_text
|
||
elif "地区" in title_text:
|
||
area = content_text
|
||
elif "简介" in title_text:
|
||
if not desc:
|
||
desc = content_text
|
||
elif "状态" in title_text:
|
||
remarks = content_text
|
||
|
||
# 如果没有从meta-item中获取到remarks
|
||
if not remarks:
|
||
remarks_tag = soup.find('span', class_='remarks')
|
||
if remarks_tag:
|
||
remarks = remarks_tag.text.strip()
|
||
|
||
# 构建标准数据结构
|
||
vod = {
|
||
"vod_id": vid,
|
||
"vod_name": title,
|
||
"vod_pic": pic,
|
||
"vod_year": year,
|
||
"vod_area": area,
|
||
"vod_remarks": remarks,
|
||
"vod_actor": actor,
|
||
"vod_director": director,
|
||
"vod_content": desc,
|
||
"type_name": ",".join(tags),
|
||
"vod_play_from": play_from,
|
||
"vod_play_url": "#".join(play_list)
|
||
}
|
||
|
||
result = {
|
||
'list': [vod]
|
||
}
|
||
except Exception as e:
|
||
self.log(f"获取详情内容时出错: {str(e)}", "ERROR")
|
||
|
||
return result
|
||
|
||
def searchContent(self, key, quick, pg=1):
|
||
"""搜索功能"""
|
||
result = {}
|
||
videos = []
|
||
|
||
# 构建搜索URL和参数
|
||
search_url = f"{self.siteUrl}/index.php/vod/search.html"
|
||
params = {"wd": key}
|
||
|
||
try:
|
||
response = self.fetch(search_url, data=params)
|
||
if response and response.status_code == 200:
|
||
html = response.text
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
|
||
# 查找搜索结果项
|
||
search_items = soup.find_all('li', class_='item')
|
||
|
||
for item in search_items:
|
||
try:
|
||
# 获取标题
|
||
title_tag = item.find('h3')
|
||
if not title_tag:
|
||
continue
|
||
|
||
title = title_tag.text.strip()
|
||
|
||
# 获取链接
|
||
link_tag = item.find('a')
|
||
if not link_tag:
|
||
continue
|
||
|
||
link = link_tag.get('href', '')
|
||
if not link.startswith('http'):
|
||
link = urllib.parse.urljoin(self.siteUrl, link)
|
||
|
||
# 提取视频ID
|
||
vid = self.extractVodId(link)
|
||
if not vid:
|
||
continue
|
||
|
||
# 获取图片
|
||
img_tag = item.find('img')
|
||
img_url = ""
|
||
if img_tag:
|
||
img_url = img_tag.get('src', img_tag.get('data-src', ''))
|
||
if img_url and not img_url.startswith('http'):
|
||
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
|
||
|
||
# 获取备注信息
|
||
remarks = ""
|
||
remarks_tag = item.find('span', class_='remarks')
|
||
if remarks_tag:
|
||
remarks = remarks_tag.text.strip()
|
||
|
||
# 获取标签信息
|
||
tags = ""
|
||
tags_tag = item.find('span', class_='tags')
|
||
if tags_tag:
|
||
tags = tags_tag.text.strip()
|
||
|
||
# 合并备注和标签
|
||
if remarks and tags:
|
||
remarks = f"{remarks} | {tags}"
|
||
elif tags:
|
||
remarks = tags
|
||
|
||
# 构建视频项
|
||
videos.append({
|
||
'vod_id': vid,
|
||
'vod_name': title,
|
||
'vod_pic': img_url,
|
||
'vod_remarks': remarks
|
||
})
|
||
except Exception as e:
|
||
self.log(f"处理搜索结果时出错: {str(e)}", "ERROR")
|
||
continue
|
||
except Exception as e:
|
||
self.log(f"搜索功能发生错误: {str(e)}", "ERROR")
|
||
|
||
result['list'] = videos
|
||
return result
|
||
|
||
def searchContentPage(self, key, quick, pg=1):
|
||
return self.searchContent(key, quick, pg)
|
||
|
||
def playerContent(self, flag, id, vipFlags):
|
||
"""获取播放内容"""
|
||
result = {}
|
||
|
||
try:
|
||
# 判断是否已经是视频URL
|
||
if self.isVideoFormat(id):
|
||
result["parse"] = 0
|
||
result["url"] = id
|
||
result["playUrl"] = ""
|
||
result["header"] = json.dumps(self.headers)
|
||
return result
|
||
|
||
# 判断是否是完整的页面URL
|
||
if id.startswith(('http://', 'https://')):
|
||
play_url = id
|
||
# 尝试作为相对路径处理
|
||
elif id.startswith('/'):
|
||
play_url = urllib.parse.urljoin(self.siteUrl, id)
|
||
# 假设是视频ID,构建播放页面URL
|
||
else:
|
||
# 检查是否是"视频ID_集数"格式
|
||
parts = id.split('_')
|
||
if len(parts) > 1 and parts[0].isdigit():
|
||
vid = parts[0]
|
||
nid = parts[1]
|
||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/{nid}.html"
|
||
else:
|
||
# 直接当作视频ID处理
|
||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{id}/sid/1/nid/1.html"
|
||
|
||
# 访问播放页获取真实播放地址
|
||
try:
|
||
self.log(f"正在解析播放页面: {play_url}")
|
||
response = self.fetch(play_url)
|
||
if response and response.status_code == 200:
|
||
html = response.text
|
||
|
||
# 查找player_aaaa变量
|
||
player_match = re.search(r'var\s+player_aaaa\s*=\s*({.*?});', html, re.DOTALL)
|
||
if player_match:
|
||
try:
|
||
player_data = json.loads(player_match.group(1))
|
||
if 'url' in player_data:
|
||
video_url = player_data['url']
|
||
if not video_url.startswith('http'):
|
||
video_url = urllib.parse.urljoin(self.siteUrl, video_url)
|
||
|
||
self.log(f"从player_aaaa获取到视频地址: {video_url}")
|
||
result["parse"] = 0
|
||
result["url"] = video_url
|
||
result["playUrl"] = ""
|
||
result["header"] = json.dumps(self.headers)
|
||
return result
|
||
except json.JSONDecodeError as e:
|
||
self.log(f"解析player_aaaa JSON出错: {str(e)}", "ERROR")
|
||
|
||
# 如果player_aaaa解析失败,尝试其他方式
|
||
# 1. 查找video标签
|
||
video_match = re.search(r'<video[^>]*src=["\'](.*?)["\']', html)
|
||
if video_match:
|
||
video_url = video_match.group(1)
|
||
if not video_url.startswith('http'):
|
||
video_url = urllib.parse.urljoin(self.siteUrl, video_url)
|
||
|
||
self.log(f"从video标签找到视频地址: {video_url}")
|
||
result["parse"] = 0
|
||
result["url"] = video_url
|
||
result["playUrl"] = ""
|
||
result["header"] = json.dumps(self.headers)
|
||
return result
|
||
|
||
# 2. 查找iframe
|
||
iframe_match = re.search(r'<iframe[^>]*src=["\'](.*?)["\']', html)
|
||
if iframe_match:
|
||
iframe_url = iframe_match.group(1)
|
||
if not iframe_url.startswith('http'):
|
||
iframe_url = urllib.parse.urljoin(self.siteUrl, iframe_url)
|
||
|
||
self.log(f"找到iframe,正在解析: {iframe_url}")
|
||
# 访问iframe内容
|
||
iframe_response = self.fetch(iframe_url)
|
||
if iframe_response and iframe_response.status_code == 200:
|
||
iframe_html = iframe_response.text
|
||
|
||
# 在iframe内容中查找视频地址
|
||
iframe_video_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', iframe_html)
|
||
if iframe_video_match:
|
||
video_url = iframe_video_match.group(1)
|
||
|
||
self.log(f"从iframe中找到视频地址: {video_url}")
|
||
result["parse"] = 0
|
||
result["url"] = video_url
|
||
result["playUrl"] = ""
|
||
result["header"] = json.dumps({
|
||
"User-Agent": self.headers["User-Agent"],
|
||
"Referer": iframe_url
|
||
})
|
||
return result
|
||
|
||
# 3. 查找任何可能的视频URL
|
||
url_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', html)
|
||
if url_match:
|
||
video_url = url_match.group(1)
|
||
|
||
self.log(f"找到可能的视频地址: {video_url}")
|
||
result["parse"] = 0
|
||
result["url"] = video_url
|
||
result["playUrl"] = ""
|
||
result["header"] = json.dumps(self.headers)
|
||
return result
|
||
except Exception as e:
|
||
self.log(f"解析播放地址时出错: {str(e)}", "ERROR")
|
||
|
||
# 如果所有方式都失败,返回外部解析标志
|
||
self.log("未找到直接可用的视频地址,需要外部解析", "WARNING")
|
||
result["parse"] = 1 # 表示需要外部解析
|
||
result["url"] = play_url # 返回播放页面URL
|
||
result["playUrl"] = ""
|
||
result["header"] = json.dumps(self.headers)
|
||
|
||
except Exception as e:
|
||
self.log(f"获取播放内容时出错: {str(e)}", "ERROR")
|
||
|
||
return result
|
||
|
||
def localProxy(self, param):
|
||
"""本地代理"""
|
||
return [404, "text/plain", {}, "Not Found"]
|