up
This commit is contained in:
Liu 2025-04-18 01:02:33 +08:00
parent 6cbb92699a
commit d1d1f8d50e
8 changed files with 1410 additions and 1552 deletions

View File

@ -1,11 +1,9 @@
央视,#genre#
CCTV1,http://home.kuaidi521.com:35455/gaoma/cctv1.m3u8
CCTV1,http://xiaoya.crccxw.top:35455/gaoma/cctv1.m3u8
CCTV1,http://58.220.219.14:9901/tsfile/live/0001_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV1,http://175.31.21.146:4480/hls/1/index.m3u8
CCTV1,http://58.220.211.90:352/tsfile/live/0001_1.m3u8?key=txiptv&playlive=1&authid$LR•IPV4『线路274』
CCTV1,http://58.220.211.90:352/tsfile/live/0001_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路275』
CCTV1,http://3501776.xyz:35455/gaoma/cctv1.m3u8
CCTV1,http://113.140.12.230:8888/newlive/live/hls/2/live.m3u8
CCTV1,http://222.219.183.103:8089/hls/1/index.m3u8
CCTV1,http://58.210.168.86:10800/newlive/live/hls/1/live.m3u8
@ -13,7 +11,6 @@ CCTV2,http://222.169.85.8:9901/tsfile/live/0002_1.m3u8?key=txiptv&playlive=1&aut
CCTV2,http://39.164.160.249:9901/tsfile/live/0002_1.m3u8
CCTV2,http://175.31.21.146:4480/hls/2/index.m3u8
CCTV2,http://116.128.243.121:85/tsfile/live/0002_1.m3u8?key=txiptv&playlive=0&authid=0
CCTV2,http://3501776.xyz:35455/gaoma/cctv2.m3u8
CCTV2,http://113.140.12.230:8888/newlive/live/hls/3/live.m3u8
CCTV2,http://60.223.224.176:8888/newlive/live/hls/3/live.m3u8
CCTV3,http://222.134.19.31:352/tsfile/live/0003_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路247』
@ -23,7 +20,6 @@ CCTV3,http://113.140.12.230:8888/newlive/live/hls/4/live.m3u8
CCTV3,http://222.169.85.8:9901/tsfile/live/0003_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV3,http://222.169.85.8:9901/tsfile/live/0003_1.m3u8
CCTV3,http://112.46.85.60:8009/hls/3/index.m3u8
CCTV3,http://3501776.xyz:35455/gaoma/cctv3.m3u8
CCTV3,http://218.29.168.146:352/tsfile/live/0003_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV3,http://58.220.219.14:9901/tsfile/live/0003_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV4,http://113.140.12.230:8888/newlive/live/hls/5/live.m3u8
@ -34,14 +30,11 @@ CCTV5,http://60.223.224.176:8888/newlive/live/hls/6/live.m3u8
CCTV5,http://113.140.12.230:8888/newlive/live/hls/6/live.m3u8
CCTV5,http://116.9.204.242:9901/tsfile/live/0005_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路177』
CCTV5,http://61.133.10.250:352/tsfile/live/0005_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路269』
CCTV5,http://home.kuaidi521.com:35455/gaoma/cctv5.m3u8
CCTV5,http://116.9.204.242:9901/tsfile/live/0005_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV5,http://116.9.204.242:9901/tsfile/live/0005_1.m3u8
CCTV5,http://xiaoya.crccxw.top:35455/gaoma/cctv5.m3u8
CCTV5+,http://home.kuaidi521.com:35455/gaoma/cctv5p.m3u8
CCTV5+,http://xiaoya.crccxw.top:35455/gaoma/cctv5p.m3u8
CCTV5+,http://222.219.183.103:8089/hls/19/index.m3u8
CCTV5+,http://182.37.169.94:352/tsfile/live/0016_1.m3u8
CCTV6,http://222.134.19.31:352/tsfile/live/0006_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV6,http://39.170.52.236:808/hls/6/index.m3u8
CCTV6,http://113.140.12.230:8888/newlive/live/hls/7/live.m3u8
@ -53,12 +46,9 @@ CCTV7,http://60.223.224.176:8888/newlive/live/hls/9/live.m3u8
CCTV7,https://live.junhao.mil.cn/rmt9502/3e6e7c5477314ec38f5e58e2a5c70024.m3u8?txSecret=57978fafb9c19880afa5c8687e8241d8&txTime=7A42BAFF
CCTV7,http://live.junhao.mil.cn/rmt9502/3e6e7c5477314ec38f5e58e2a5c70024.m3u8?txSecret=57978fafb9c19880afa5c8687e8241d8&txTime=7A42BAFF
CCTV7,http://xiaoya.crccxw.top:35455/gaoma/cctv7.m3u8
CCTV7,http://home.kuaidi521.com:35455/gaoma/cctv7.m3u8
CCTV7,http://101.19.84.8:9901/tsfile/live/0007_1.m3u8
CCTV7,http://222.219.183.103:8089/hls/7/index.m3u8
CCTV8,http://222.134.19.31:352/tsfile/live/0008_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV8,http://3501776.xyz:35455/gaoma/cctv8.m3u8
CCTV8,http://home.kuaidi521.com:35455/gaoma/cctv8.m3u8
CCTV8,http://112.46.85.60:8009/hls/8/index.m3u8
CCTV8,http://101.19.84.8:9901/tsfile/live/0008_1.m3u8
CCTV8,http://113.140.12.230:8888/newlive/live/hls/9/live.m3u8
@ -66,23 +56,16 @@ CCTV8,http://xiaoya.crccxw.top:35455/gaoma/cctv8.m3u8
CCTV8,http://60.223.224.176:8888/newlive/live/hls/10/live.m3u8?
CCTV8,http://61.133.10.250:352/tsfile/live/0008_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV9,http://180.213.174.225:9901/tsfile/live/0009_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV9,http://3501776.xyz:35455/gaoma/cctv9.m3u8
CCTV9,http://xiaoya.crccxw.top:35455/gaoma/cctv9.m3u8
CCTV9,http://home.kuaidi521.com:35455/gaoma/cctv9.m3u8
CCTV9,http://60.223.224.176:8888/newlive/live/hls/11/live.m3u8?
CCTV9,http://222.219.183.103:8089/hls/9/index.m3u8
CCTV10,http://39.164.160.249:9901/tsfile/live/0010_1.m3u8
CCTV10,http://3501776.xyz:35455/gaoma/cctv10.m3u8
CCTV10,http://xiaoya.crccxw.top:35455/gaoma/cctv10.m3u8
CCTV10,http://113.140.12.230:8888/newlive/live/hls/11/live.m3u8
CCTV10,http://222.169.85.8:9901/tsfile/live/0010_1.m3u8
CCTV10,http://222.219.183.103:8089/hls/10/index.m3u8
CCTV10,http://60.223.224.176:8888/newlive/live/hls/12/live.m3u8
CCTV10,http://182.37.169.94:352/tsfile/live/0010_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV11,http://39.164.160.249:9901/tsfile/live/0011_1.m3u8
CCTV11,http://home.kuaidi521.com:35455/gaoma/cctv11.m3u8
CCTV12,http://home.kuaidi521.com:35455/gaoma/cctv12.m3u8
CCTV12,http://39.164.160.249:9901/tsfile/live/0012_1.m3u8
CCTV12,http://xiaoya.crccxw.top:35455/gaoma/cctv12.m3u8
CCTV13,http://61.163.181.78:9901/tsfile/live/0013_1.m3u8
CCTV13,https://event.pull.hebtv.com/jishi/cp1.m3u8
@ -90,12 +73,9 @@ CCTV13,http://ali-m-l.cztv.com/channels/lantian/channel21/1080p.m3u8
CCTV13,http://183.215.134.239:19901/tsfile/live/0013_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV13,http://39.164.160.249:9901/tsfile/live/0013_1.m3u8
CCTV14,https://event.pull.hebtv.com/jishi/cp2.m3u8
CCTV14,http://3501776.xyz:35455/gaoma/cctv14.m3u8
CCTV14,http://39.164.160.249:9901/tsfile/live/0014_1.m3u8
CCTV14,http://58.17.48.228:808/hls/14/index.m3u8
CCTV14,http://222.173.108.238:352/tsfile/live/0014_1.m3u8?key=txiptv&playlive=1&authid=0
CCTV15,http://39.164.160.249:9901/tsfile/live/0015_1.m3u8
CCTV15,http://home.kuaidi521.com:35455/gaoma/cctv15.m3u8
CCTV16,http://xiaoya.crccxw.top:35455/gaoma/cctv16.m3u8
CCTV17,http://xiaoya.crccxw.top:35455/gaoma/cctv17.m3u8
CCTV风云剧场,http://dassby.qqff.top:99/live/风云剧场/index.m3u8
@ -118,7 +98,6 @@ CCTV15,http://61.156.228.12:8154/tsfile/live/0015_1.m3u8
安多卫视,https://liveout.xntv.tv/a65jur/96iln2.m3u8
安多卫视,https://livecdn.dmqhyadmin.com/hls/xjmenyuan.m3u8
安徽卫视,http://115.149.139.141:10000/tsfile/live/1028_1.m3u8?key=txiptv&playlive=0&authid=0
安徽卫视,http://39.164.160.249:9901/tsfile/live/0130_1.m3u8
北京卫视,http://183.215.134.239:19901/tsfile/live/0122_1.m3u8?key=txiptv&playlive=1&authid=0
北京卫视,http://115.149.139.141:10000/tsfile/live/1022_1.m3u8?key=txiptv&playlive=0&authid=0
北京卫视,http://58.220.219.14:9901/tsfile/live/0122_1.m3u8?key=txiptv&playlive=1&authid=0
@ -131,37 +110,29 @@ CCTV15,http://61.156.228.12:8154/tsfile/live/0015_1.m3u8
重庆卫视,http://222.173.108.238:352/tsfile/live/1016_1.m3u8?key=txiptv&playlive=1&authid=0
大湾区卫视,http://222.128.55.152:9080/live/dwq.m3u8
东方卫视,http://183.215.134.239:19901/tsfile/live/0107_1.m3u8?key=txiptv&playlive=1&authid=0
东方卫视,http://39.164.160.249:9901/tsfile/live/0107_1.m3u8
东南卫视,http://120.76.248.139/live/bfgd/4200000483.m3u8
东南卫视,http://115.149.139.141:10001/tsfile/live/1035_1.m3u8?blog.ntnas.top
东南卫视,http://220.164.192.48:50085/tsfile/live/0120_1.m3u8
甘肃卫视,http://live.zohi.tv/video/s10001-fztv-3/index.m3u8
广东卫视,http://183.215.134.239:19901/tsfile/live/0125_1.m3u8?key=txiptv&playlive=1&authid=0
广东卫视,http://115.149.139.141:10001/tsfile/live/1029_1.m3u8?blog.ntnas.top
广东卫视,http://222.173.108.238:352/tsfile/live/0125_1.m3u8?key=txiptv&playlive=1&authid=0
广东卫视,http://39.164.160.249:9901/tsfile/live/0125_1.m3u8
广西卫视,http://live.cztv.cc:85/live/ggpd.m3u8
广西卫视,http://115.149.139.141:10000/tsfile/live/1037_1.m3u8?key=txiptv&playlive=0&authid=0
广西卫视,http://220.164.192.48:50085/tsfile/live/0112_1.m3u8
广西卫视,http://115.149.139.141:10001/tsfile/live/1037_1.m3u8?blog.ntnas.top
广西卫视,http://116.9.204.242:9901/tsfile/live/0113_1.m3u8?key=txiptv&playlive=1&authid=0
广西卫视,http://116.9.204.242:9901/tsfile/live/0113_1.m3u8
贵州卫视,http://39.164.160.249:9901/tsfile/live/0120_1.m3u8
河北卫视,http://116.131.190.210:352/tsfile/live/0117_1.m3u8?key=txiptv&playlive=1&authid=0
河北卫视,http://60.8.49.38:352/tsfile/live/0117_1.m3u8?key=txiptv&playlive=1&authid=0
河南卫视,http://39.164.160.249:9901/tsfile/live/0139_1.m3u8
黑龙江卫视,http://115.149.139.141:10000/tsfile/live/1030_1.m3u8?key=txiptv&playlive=0&authid=0
黑龙江卫视,http://39.164.160.249:9901/tsfile/live/0143_1.m3u8
湖北卫视,http://115.149.139.141:10001/tsfile/live/1027_1.m3u8
湖南卫视,http://183.215.134.239:19901/tsfile/live/0128_1.m3u8?key=txiptv&playlive=1&authid=0
湖南卫视,http://39.164.160.249:9901/tsfile/live/0128_1.m3u8
湖南卫视,http://60.223.224.176:8888/newlive/live/hls/22/live.m3u8
吉林卫视,http://220.164.192.48:50085/tsfile/live/0118_1.m3u8
江西卫视,http://120.76.248.139/live/bfgd/4200000098.m3u8
江西卫视,http://183.215.134.239:19901/tsfile/live/0138_1.m3u8?key=txiptv&playlive=1&authid=0
江西卫视,http://222.173.108.238:352/tsfile/live/1010_1.m3u8?key=txiptv&playlive=1&authid=0
辽宁卫视,http://222.173.108.238:352/tsfile/live/1008_1.m3u8?key=txiptv&playlive=1&authid=0
辽宁卫视,http://39.164.160.249:9901/tsfile/live/0121_1.m3u8
辽宁卫视,http://115.149.139.141:10001/tsfile/live/1028_1.m3u8
辽宁卫视,http://115.149.139.141:10001/tsfile/live/1028_1.m3u8?blog.ntnas.top
内蒙古蒙语卫视,https://livestream-bt.nmtv.cn/nmtv/2315general.m3u8?txSecret=4971666599ef9411629213c9a300bf66&txTime=771EF880
@ -182,7 +153,6 @@ CCTV15,http://61.156.228.12:8154/tsfile/live/0015_1.m3u8
亚洲卫视,https://p2hs.vzan.com/slowlive/821481626725612419/live.m3u8
延边卫视,http://l.cztvcloud.com/channels/lantian/SXxinchang2/720p.m3u8
延边卫视,http://l.cztvcloud.com/channels/lantian/SXxinchang2/720p.m3u8$LR•IPV4『线路20』
云南卫视,http://39.164.160.249:9901/tsfile/live/0119_1.m3u8
浙江卫视,https://ali-m-l.cztv.com/channels/lantian/channel001/1080p.m3u8?
浙江卫视,http://ali-m-l.cztv.com:80/channels/lantian/channel001/1080p.m3u8
浙江卫视,https://ali-m-l.cztv.com/channels/lantian/channel001/1080p.m3u8
@ -194,7 +164,6 @@ CCTV15,http://61.156.228.12:8154/tsfile/live/0015_1.m3u8
浙江卫视,http://wouu.net:9977/udp/239.93.0.124:5140$LR•IPV4『线路157』
北京卫视,http://61.156.228.12:8154/tsfile/live/0122_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路83』
北京卫视,http://123.129.70.178:9901/tsfile/live/0122_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路88』
江苏卫视,http://124.128.73.58:9901/tsfile/live/0127_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路95』
安徽卫视,http://123.129.70.178:9901/tsfile/live/0130_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路73』
安徽卫视,http://61.136.172.236:9901/tsfile/live/0130_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路75』
安徽卫视,http://61.156.228.12:8154/tsfile/live/0130_1.m3u8?key=txiptv&playlive=1&authid=0$LR•IPV4『线路76』
@ -292,7 +261,6 @@ CGTN阿语,https://0472.org/hls/cgtna.m3u8
少儿频道,#genre#
熊大熊二,https://newcntv.qcloudcdn.com/asp/hls/1200/0303000a/3/default/1733da751de64e6e910abda889d87a26/1200.m3u8
福州少儿,http://live.zohi.tv/video/s10001-fztv-4/index.m3u8
武汉少儿,http://stream.appwuhan.com/7tzb/sd/live.m3u8
GOOD卡通,https://dqhxk7sbp7xog.cloudfront.net/osmflivech45.m3u8
倒霉特熊,https://newcntv.qcloudcdn.com/asp/hls/1200/0303000a/3/default/87f87ba569c147e3805f80e4844d2de9/1200.m3u8
雲朵妈妈,https://newcntv.qcloudcdn.com/asp/hls/1200/0303000a/3/default/d8ad434c6b08421a927557a4d98da65c/1200.m3u8
@ -314,7 +282,6 @@ key=0b292c335e5837e947f9409c59228f3e6
新昌休闲影视,http://l.cztvcloud.com/channels/lantian/SXxinchang2/720p.m3u8
石河子影视文体,http://124.88.144.73:1935/live/jjsh/HasBahCa.m3u8
烟台影视,http://live.yantaitv.cn/live/e9f1d9045d474c31884d13fa4ffbbd16/a4a9b3704d854756907845107cc56129-1.m3u8
影视频道,http://112.30.194.221:20080/live/d18ff95cb1fb4bbcb56215e189fc12be/hls.m3u8
美丽中国说,http://newcntv.qcloudcdn.com/asp/hls/4000/0303000a/3/default/8cdd688b04e4495ba1635e218419d6f9/4000.m3u8

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

View File

@ -1,25 +0,0 @@
{
"请求头": "User-Agent$MOBILE_UA",
"主页url": "https://www.aiwodj.com",
"数组": "lazy lazyloaded&&</a[替换:amp;>>]",
"图片": "data-src=\"&&\"",
"标题": "title=\"&&\"",
"链接": "href=\"&&\"",
"副标题": "+module-item-caption\">&&</div",
"简介": "description\" content=\"&&,这里",
"线路数组": "module-tab-item tab-item&&</div",
"线路标题": "+span>&&</span",
"播放数组": "module-blocklist&&</div>",
"播放列表": "<a&&</a>",
"播放标题": "span>&&</span>",
"播放链接": "href=\"&&\"",
"跳转播放链接": "urlDecode(var player_*\"url\":\"&&\")",
"搜索url": "https://www.aiwodj.com/vodsearch/{wd}----------{pg}---.html",
"搜索数组": "lazy lazyload&&</a[替换:amp;>>]",
"搜索图片": "data-src=\"&&\"",
"搜索标题": "+title=\"&&\"",
"搜索链接": "href=\"&&\"",
"分类url": "https://www.aiwodj.com/vodshow/{cateId}--{by}-{class}-{lang}----{catePg}---{year}.html",
"分类": "穿越&战神&重生&爱情&萌娃&神医&古代&玄幻&言情",
"分类值": "fenle&fenlei2&fenlei3&fenlei4&guda&shenyi&gudai&xuanhuan&yanqing"
}

View File

@ -89,6 +89,29 @@
"filterable": 1,
"ext": "http://cmsyt114.lyyytv.cn/api.php/app/"
},
{
"key": "乐享影视",
"name": "乐享|影视",
"type": 3,
"api": "csp_AppYsV2",
"searchable": 1,
"quickSearch": 1,
"filterable": 1,
"ext": "http://114.66.55.28:8818/api.php/app/"
},
{
"key": "金牌影视",
"name": "金牌|影视",
"type": 3,
"api": "./py/金牌影视.py",
"searchable": 1,
"quickSearch": 1,
"filterable": 1,
"playerType": 2,
"ext": {
"site": "https://www.hkybqufgh.com,https://www.sizhengxt.com,https://0996zp.com,https://9zhoukj.com/,https://www.sizhengxt.com,https://www.tjrongze.com,https://www.jiabaide.cn,https://cqzuoer.com"
}
},
{
"key": "哔哩视频",
"name": "哔哩|视频",
@ -195,17 +218,6 @@
"changeable": 1,
"ext": "./json/lb.json?"
},
{
"key": "小米",
"name": "小米4K弹幕",
"type": 3,
"api": "csp_PanWebShare",
"searchable": 1,
"quickSearch": 1,
"filterable": 1,
"changeable": 1,
"ext": "./json/xm.json?"
},
{
"key": "至臻",
"name": "至臻4K弹幕",
@ -415,10 +427,23 @@
"key": "河马短剧",
"name": "河马|短剧",
"type": 3,
"api": "csp_AppHMDJ",
"api": "./py/河马短剧.py",
"searchable": 1,
"quickSearch": 0,
"filterable": 0
"changeable": 1,
"quickSearch": 1,
"filterable": 1,
"playerType": 2
},
{
"key": "偷乐短剧",
"name": "偷乐|短剧",
"type": 3,
"api": "./py/偷乐短剧.py",
"searchable": 1,
"changeable": 1,
"quickSearch": 1,
"filterable": 1,
"playerType": 2
},
{
"key": "爱看短剧",
@ -431,16 +456,6 @@
"filterable": 1,
"playerType": 2
},
{
"key": "爱我短剧",
"name": "爱我|短剧",
"type": 3,
"api": "csp_XBPQ",
"searchable": 1,
"quickSearch": 1,
"filterable": 1,
"ext": "./XBPQ/爱我短剧.json"
},
{
"key": "短剧网",
"name": "短剧网|短剧",
@ -468,19 +483,6 @@
"filterable": 1,
"ext": "https://cs.hgyx.vip/api2/api.php/app/"
},
{
"key": "金牌影视",
"name": "金牌|影视",
"type": 3,
"api": "./py/金牌影视.py",
"searchable": 1,
"quickSearch": 1,
"filterable": 1,
"playerType": 2,
"ext": {
"site": "https://www.hkybqufgh.com,https://www.sizhengxt.com,https://0996zp.com,https://9zhoukj.com/,https://www.sizhengxt.com,https://www.tjrongze.com,https://www.jiabaide.cn,https://cqzuoer.com"
}
},
{
"key": "猎手影视",
"name": "猎手APP",

File diff suppressed because it is too large Load Diff

790
潇洒/py/偷乐短剧.py Normal file
View File

@ -0,0 +1,790 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# 偷乐短剧爬虫
import sys
import json
import re
import time
import urllib.parse
import requests
from bs4 import BeautifulSoup
# 导入基础类
sys.path.append('../../')
try:
from base.spider import Spider
except ImportError:
# 本地调试时的替代实现
class Spider:
def init(self, extend=""):
pass
class Spider(Spider):
def __init__(self):
# 网站主URL
self.siteUrl = "https://www.toule.top"
# 根据网站实际结构,分类链接格式为: /index.php/vod/show/class/分类名/id/1.html
# 分类ID映射 - 从网站中提取的分类
self.cateManual = {
"男频": "/index.php/vod/show/class/%E7%94%B7%E9%A2%91/id/1.html",
"女频": "/index.php/vod/show/class/%E5%A5%B3%E9%A2%91/id/1.html",
"都市": "/index.php/vod/show/class/%E9%83%BD%E5%B8%82/id/1.html",
"赘婿": "/index.php/vod/show/class/%E8%B5%98%E5%A9%BF/id/1.html",
"战神": "/index.php/vod/show/class/%E6%88%98%E7%A5%9E/id/1.html",
"古代言情": "/index.php/vod/show/class/%E5%8F%A4%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
"现代言情": "/index.php/vod/show/class/%E7%8E%B0%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
"历史": "/index.php/vod/show/class/%E5%8E%86%E5%8F%B2/id/1.html",
"玄幻": "/index.php/vod/show/class/%E7%8E%84%E5%B9%BB/id/1.html",
"搞笑": "/index.php/vod/show/class/%E6%90%9E%E7%AC%91/id/1.html",
"甜宠": "/index.php/vod/show/class/%E7%94%9C%E5%AE%A0/id/1.html",
"励志": "/index.php/vod/show/class/%E5%8A%B1%E5%BF%97/id/1.html",
"逆袭": "/index.php/vod/show/class/%E9%80%86%E8%A2%AD/id/1.html",
"穿越": "/index.php/vod/show/class/%E7%A9%BF%E8%B6%8A/id/1.html",
"古装": "/index.php/vod/show/class/%E5%8F%A4%E8%A3%85/id/1.html"
}
# 请求头
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Referer": "https://www.toule.top/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
}
# 缓存
self.cache = {}
self.cache_timeout = {}
def getName(self):
return "偷乐短剧"
def init(self, extend=""):
# 初始化方法,可以留空
return
def isVideoFormat(self, url):
"""判断是否为视频格式"""
video_formats = ['.mp4', '.m3u8', '.ts', '.flv', '.avi', '.mkv', '.mov', '.rmvb', '.3gp']
for format in video_formats:
if format in url.lower():
return True
return False
def manualVideoCheck(self):
"""是否需要手动检查视频"""
return False
# 工具方法 - 网络请求
def fetch(self, url, headers=None, data=None, method="GET"):
"""统一的网络请求方法"""
try:
if headers is None:
headers = self.headers.copy()
if method.upper() == "GET":
response = requests.get(url, headers=headers, params=data, timeout=10,verify=False)
else: # POST
response = requests.post(url, headers=headers, data=data, timeout=10,verify=False)
response.raise_for_status()
response.encoding = response.apparent_encoding or 'utf-8'
return response
except Exception as e:
self.log(f"请求失败: {url}, 错误: {str(e)}", "ERROR")
return None
# 缓存方法
def getCache(self, key, timeout=3600):
"""获取缓存数据"""
if key in self.cache and key in self.cache_timeout:
if time.time() < self.cache_timeout[key]:
return self.cache[key]
else:
del self.cache[key]
del self.cache_timeout[key]
return None
def setCache(self, key, value, timeout=3600):
"""设置缓存数据"""
self.cache[key] = value
self.cache_timeout[key] = time.time() + timeout
# 日志方法
def log(self, msg, level='INFO'):
"""记录日志"""
levels = {
'DEBUG': 0,
'INFO': 1,
'WARNING': 2,
'ERROR': 3
}
current_level = 'INFO' # 可以设置为DEBUG以获取更多信息
if levels.get(level, 4) >= levels.get(current_level, 1):
print(f"[{level}] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}")
# 辅助方法 - 从URL中提取视频ID
def extractVodId(self, url):
"""从URL中提取视频ID"""
# 路径格式: /index.php/vod/play/id/9024/sid/1/nid/1.html
match = re.search(r'/id/(\d+)/', url)
if match:
return match.group(1)
return ""
# 辅助方法 - 从网页内容中提取分类
def extractCategories(self, text):
"""从网页内容中提取分类标签"""
cats = []
# 匹配标签字符串,例如: "男频,逆袭,亲情,短剧"
if "," in text:
parts = text.split(",")
for part in parts:
part = part.strip()
if part and part != "短剧":
cats.append(part)
return cats
# 主要接口实现
def homeContent(self, filter):
"""获取首页分类及内容"""
result = {}
classes = []
# 从缓存获取
cache_key = 'home_classes'
cached_classes = self.getCache(cache_key)
if cached_classes:
classes = cached_classes
else:
# 使用预定义的分类
for k, v in self.cateManual.items():
classes.append({
'type_id': v, # 使用完整URL路径作为type_id
'type_name': k
})
# 保存到缓存
self.setCache(cache_key, classes, 24*3600) # 缓存24小时
result['class'] = classes
# 获取首页推荐视频
videos = self.homeVideoContent().get('list', [])
result['list'] = videos
return result
def homeVideoContent(self):
"""获取首页推荐视频内容"""
result = {'list': []}
videos = []
# 从缓存获取
cache_key = 'home_videos'
cached_videos = self.getCache(cache_key)
if cached_videos:
return {'list': cached_videos}
try:
response = self.fetch(self.siteUrl)
if response and response.status_code == 200:
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# 查找最新更新区域
latest_section = soup.find('h2', text=lambda t: t and '最新更新' in t)
if latest_section:
container = latest_section.parent # 获取容器
if container:
# 查找所有 li.item 元素
items = container.find_all('li', class_='item')
for item in items:
try:
# 获取链接和标题
title_link = item.find('h3')
if not title_link:
continue
title = title_link.text.strip()
# 获取第一个链接作为详情页链接
link_tag = item.find('a')
if not link_tag:
continue
link = link_tag.get('href', '')
if not link.startswith('http'):
link = urllib.parse.urljoin(self.siteUrl, link)
# 提取ID
vid = self.extractVodId(link)
if not vid:
continue
# 获取图片
img_tag = item.find('img')
img_url = ""
if img_tag:
img_url = img_tag.get('src', img_tag.get('data-src', ''))
if img_url and not img_url.startswith('http'):
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
# 获取备注信息
remarks = ""
remarks_tag = item.find('span', class_='remarks')
if remarks_tag:
remarks = remarks_tag.text.strip()
# 获取标签信息
tags = ""
tags_tag = item.find('span', class_='tags')
if tags_tag:
tags = tags_tag.text.strip()
# 合并备注和标签
if remarks and tags:
remarks = f"{remarks} | {tags}"
elif tags:
remarks = tags
# 构建视频项
videos.append({
'vod_id': vid,
'vod_name': title,
'vod_pic': img_url,
'vod_remarks': remarks
})
except Exception as e:
self.log(f"处理视频项时出错: {str(e)}", "ERROR")
continue
# 保存到缓存
self.setCache(cache_key, videos, 3600) # 缓存1小时
except Exception as e:
self.log(f"获取首页视频内容发生错误: {str(e)}", "ERROR")
result['list'] = videos
return result
def categoryContent(self, tid, pg, filter, extend):
"""获取分类内容"""
result = {}
videos = []
# 处理页码
if pg is None:
pg = 1
else:
pg = int(pg)
# 构建分类URL - tid是完整的URL路径
if tid.startswith("/"):
# 替换页码URL格式可能像: /index.php/vod/show/class/男频/id/1.html
if pg > 1:
if "html" in tid:
category_url = tid.replace(".html", f"/page/{pg}.html")
else:
category_url = f"{tid}/page/{pg}.html"
else:
category_url = tid
full_url = urllib.parse.urljoin(self.siteUrl, category_url)
else:
# 如果tid不是URL路径可能是旧版分类ID尝试查找对应URL
category_url = ""
for name, url in self.cateManual.items():
if name == tid:
category_url = url
break
if not category_url:
self.log(f"未找到分类ID对应的URL: {tid}", "ERROR")
result['list'] = []
result['page'] = pg
result['pagecount'] = 1
result['limit'] = 0
result['total'] = 0
return result
# 处理页码
if pg > 1:
if "html" in category_url:
category_url = category_url.replace(".html", f"/page/{pg}.html")
else:
category_url = f"{category_url}/page/{pg}.html"
full_url = urllib.parse.urljoin(self.siteUrl, category_url)
# 请求分类页
try:
response = self.fetch(full_url)
if response and response.status_code == 200:
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# 查找视频项根据实际HTML结构调整
items = soup.find_all('li', class_='item')
for item in items:
try:
# 获取链接和标题
title_tag = item.find('h3')
if not title_tag:
continue
title = title_tag.text.strip()
# 获取链接
link_tag = item.find('a')
if not link_tag:
continue
link = link_tag.get('href', '')
if not link.startswith('http'):
link = urllib.parse.urljoin(self.siteUrl, link)
# 提取ID
vid = self.extractVodId(link)
if not vid:
continue
# 获取图片
img_tag = item.find('img')
img_url = ""
if img_tag:
img_url = img_tag.get('src', img_tag.get('data-src', ''))
if img_url and not img_url.startswith('http'):
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
# 获取备注信息
remarks = ""
remarks_tag = item.find('span', class_='remarks')
if remarks_tag:
remarks = remarks_tag.text.strip()
# 获取标签信息
tags = ""
tags_tag = item.find('span', class_='tags')
if tags_tag:
tags = tags_tag.text.strip()
# 合并备注和标签
if remarks and tags:
remarks = f"{remarks} | {tags}"
elif tags:
remarks = tags
# 构建视频项
videos.append({
'vod_id': vid,
'vod_name': title,
'vod_pic': img_url,
'vod_remarks': remarks
})
except Exception as e:
self.log(f"处理分类视频项时出错: {str(e)}", "ERROR")
continue
# 查找分页信息
# 默认值
total = len(videos)
pagecount = 1
limit = 20
# 尝试查找分页元素
pagination = soup.find('ul', class_='page')
if pagination:
# 查找最后一页的链接
last_page_links = pagination.find_all('a')
for link in last_page_links:
page_text = link.text.strip()
if page_text.isdigit():
pagecount = max(pagecount, int(page_text))
except Exception as e:
self.log(f"获取分类内容发生错误: {str(e)}", "ERROR")
result['list'] = videos
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = limit
result['total'] = total
return result
def detailContent(self, ids):
"""获取详情内容"""
result = {}
if not ids or len(ids) == 0:
return result
# 视频ID
vid = ids[0]
# 构建播放页URL
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
try:
response = self.fetch(play_url)
if not response or response.status_code != 200:
return result
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# 提取视频基本信息
# 标题
title = ""
title_tag = soup.find('h1', class_='items-title')
if title_tag:
title = title_tag.text.strip()
# 图片
pic = ""
pic_tag = soup.find('img', class_='thumb')
if pic_tag:
pic = pic_tag.get('src', '')
if pic and not pic.startswith('http'):
pic = urllib.parse.urljoin(self.siteUrl, pic)
# 简介
desc = ""
desc_tag = soup.find('div', class_='text-content')
if desc_tag:
desc = desc_tag.text.strip()
# 标签/分类
tags = []
tags_container = soup.find('span', class_='items-tags')
if tags_container:
tag_links = tags_container.find_all('a')
for tag in tag_links:
tag_text = tag.text.strip()
if tag_text:
tags.append(tag_text)
# 提取播放列表
play_from = "偷乐短剧"
play_list = []
# 查找播放列表区域
play_area = soup.find('div', class_='swiper-wrapper')
if play_area:
# 查找所有剧集链接
episode_links = play_area.find_all('a')
for ep in episode_links:
ep_title = ep.text.strip()
ep_url = ep.get('href', '')
if ep_url:
# 直接使用URL作为ID
if not ep_url.startswith('http'):
ep_url = urllib.parse.urljoin(self.siteUrl, ep_url)
# 提取集数信息
ep_num = ep_title
if ep_num.isdigit():
ep_num = f"{ep_num}"
play_list.append(f"{ep_num}${ep_url}")
# 如果没有找到播放列表,查找播放按钮
if not play_list:
play_btn = soup.find('a', class_='btn-play')
if play_btn:
play_url = play_btn.get('href', '')
if play_url:
if not play_url.startswith('http'):
play_url = urllib.parse.urljoin(self.siteUrl, play_url)
play_list.append(f"播放${play_url}")
# 如果仍然没有找到播放链接使用播放页URL
if not play_list:
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
play_list.append(f"播放${play_url}")
# 提取更多信息(导演、演员等)
director = ""
actor = ""
year = ""
area = ""
remarks = ""
# 查找备注信息
meta_items = soup.find_all('div', class_='meta-item')
for item in meta_items:
item_title = item.find('span', class_='item-title')
item_content = item.find('span', class_='item-content')
if item_title and item_content:
title_text = item_title.text.strip()
content_text = item_content.text.strip()
if "导演" in title_text:
director = content_text
elif "主演" in title_text:
actor = content_text
elif "年份" in title_text:
year = content_text
elif "地区" in title_text:
area = content_text
elif "简介" in title_text:
if not desc:
desc = content_text
elif "状态" in title_text:
remarks = content_text
# 如果没有从meta-item中获取到remarks
if not remarks:
remarks_tag = soup.find('span', class_='remarks')
if remarks_tag:
remarks = remarks_tag.text.strip()
# 构建标准数据结构
vod = {
"vod_id": vid,
"vod_name": title,
"vod_pic": pic,
"vod_year": year,
"vod_area": area,
"vod_remarks": remarks,
"vod_actor": actor,
"vod_director": director,
"vod_content": desc,
"type_name": ",".join(tags),
"vod_play_from": play_from,
"vod_play_url": "#".join(play_list)
}
result = {
'list': [vod]
}
except Exception as e:
self.log(f"获取详情内容时出错: {str(e)}", "ERROR")
return result
def searchContent(self, key, quick, pg=1):
"""搜索功能"""
result = {}
videos = []
# 构建搜索URL和参数
search_url = f"{self.siteUrl}/index.php/vod/search.html"
params = {"wd": key}
try:
response = self.fetch(search_url, data=params)
if response and response.status_code == 200:
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# 查找搜索结果项
search_items = soup.find_all('li', class_='item')
for item in search_items:
try:
# 获取标题
title_tag = item.find('h3')
if not title_tag:
continue
title = title_tag.text.strip()
# 获取链接
link_tag = item.find('a')
if not link_tag:
continue
link = link_tag.get('href', '')
if not link.startswith('http'):
link = urllib.parse.urljoin(self.siteUrl, link)
# 提取视频ID
vid = self.extractVodId(link)
if not vid:
continue
# 获取图片
img_tag = item.find('img')
img_url = ""
if img_tag:
img_url = img_tag.get('src', img_tag.get('data-src', ''))
if img_url and not img_url.startswith('http'):
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
# 获取备注信息
remarks = ""
remarks_tag = item.find('span', class_='remarks')
if remarks_tag:
remarks = remarks_tag.text.strip()
# 获取标签信息
tags = ""
tags_tag = item.find('span', class_='tags')
if tags_tag:
tags = tags_tag.text.strip()
# 合并备注和标签
if remarks and tags:
remarks = f"{remarks} | {tags}"
elif tags:
remarks = tags
# 构建视频项
videos.append({
'vod_id': vid,
'vod_name': title,
'vod_pic': img_url,
'vod_remarks': remarks
})
except Exception as e:
self.log(f"处理搜索结果时出错: {str(e)}", "ERROR")
continue
except Exception as e:
self.log(f"搜索功能发生错误: {str(e)}", "ERROR")
result['list'] = videos
return result
def searchContentPage(self, key, quick, pg=1):
return self.searchContent(key, quick, pg)
def playerContent(self, flag, id, vipFlags):
"""获取播放内容"""
result = {}
try:
# 判断是否已经是视频URL
if self.isVideoFormat(id):
result["parse"] = 0
result["url"] = id
result["playUrl"] = ""
result["header"] = json.dumps(self.headers)
return result
# 判断是否是完整的页面URL
if id.startswith(('http://', 'https://')):
play_url = id
# 尝试作为相对路径处理
elif id.startswith('/'):
play_url = urllib.parse.urljoin(self.siteUrl, id)
# 假设是视频ID构建播放页面URL
else:
# 检查是否是"视频ID_集数"格式
parts = id.split('_')
if len(parts) > 1 and parts[0].isdigit():
vid = parts[0]
nid = parts[1]
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/{nid}.html"
else:
# 直接当作视频ID处理
play_url = f"{self.siteUrl}/index.php/vod/play/id/{id}/sid/1/nid/1.html"
# 访问播放页获取真实播放地址
try:
self.log(f"正在解析播放页面: {play_url}")
response = self.fetch(play_url)
if response and response.status_code == 200:
html = response.text
# 查找player_aaaa变量
player_match = re.search(r'var\s+player_aaaa\s*=\s*({.*?});', html, re.DOTALL)
if player_match:
try:
player_data = json.loads(player_match.group(1))
if 'url' in player_data:
video_url = player_data['url']
if not video_url.startswith('http'):
video_url = urllib.parse.urljoin(self.siteUrl, video_url)
self.log(f"从player_aaaa获取到视频地址: {video_url}")
result["parse"] = 0
result["url"] = video_url
result["playUrl"] = ""
result["header"] = json.dumps(self.headers)
return result
except json.JSONDecodeError as e:
self.log(f"解析player_aaaa JSON出错: {str(e)}", "ERROR")
# 如果player_aaaa解析失败尝试其他方式
# 1. 查找video标签
video_match = re.search(r'<video[^>]*src=["\'](.*?)["\']', html)
if video_match:
video_url = video_match.group(1)
if not video_url.startswith('http'):
video_url = urllib.parse.urljoin(self.siteUrl, video_url)
self.log(f"从video标签找到视频地址: {video_url}")
result["parse"] = 0
result["url"] = video_url
result["playUrl"] = ""
result["header"] = json.dumps(self.headers)
return result
# 2. 查找iframe
iframe_match = re.search(r'<iframe[^>]*src=["\'](.*?)["\']', html)
if iframe_match:
iframe_url = iframe_match.group(1)
if not iframe_url.startswith('http'):
iframe_url = urllib.parse.urljoin(self.siteUrl, iframe_url)
self.log(f"找到iframe正在解析: {iframe_url}")
# 访问iframe内容
iframe_response = self.fetch(iframe_url)
if iframe_response and iframe_response.status_code == 200:
iframe_html = iframe_response.text
# 在iframe内容中查找视频地址
iframe_video_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', iframe_html)
if iframe_video_match:
video_url = iframe_video_match.group(1)
self.log(f"从iframe中找到视频地址: {video_url}")
result["parse"] = 0
result["url"] = video_url
result["playUrl"] = ""
result["header"] = json.dumps({
"User-Agent": self.headers["User-Agent"],
"Referer": iframe_url
})
return result
# 3. 查找任何可能的视频URL
url_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', html)
if url_match:
video_url = url_match.group(1)
self.log(f"找到可能的视频地址: {video_url}")
result["parse"] = 0
result["url"] = video_url
result["playUrl"] = ""
result["header"] = json.dumps(self.headers)
return result
except Exception as e:
self.log(f"解析播放地址时出错: {str(e)}", "ERROR")
# 如果所有方式都失败,返回外部解析标志
self.log("未找到直接可用的视频地址,需要外部解析", "WARNING")
result["parse"] = 1 # 表示需要外部解析
result["url"] = play_url # 返回播放页面URL
result["playUrl"] = ""
result["header"] = json.dumps(self.headers)
except Exception as e:
self.log(f"获取播放内容时出错: {str(e)}", "ERROR")
return result
def localProxy(self, param):
"""本地代理"""
return [404, "text/plain", {}, "Not Found"]

581
潇洒/py/河马短剧.py Normal file
View File

@ -0,0 +1,581 @@
# -*- coding: utf-8 -*-
import requests
import re
import json
import traceback
import sys
sys.path.append('../../')
try:
from base.spider import Spider
except ImportError:
# 定义一个基础接口类,用于本地测试
class Spider:
def init(self, extend=""):
pass
class Spider(Spider):
def __init__(self):
self.siteUrl = "https://www.kuaikaw.cn"
self.nextData = None # 缓存NEXT_DATA数据
self.cateManual = {
"甜宠": "462",
"古装仙侠": "1102",
"现代言情": "1145",
"青春": "1170",
"豪门恩怨": "585",
"逆袭": "417-464",
"重生": "439-465",
"系统": "1159",
"总裁": "1147",
"职场商战": "943"
}
def getName(self):
# 返回爬虫名称
return "河马短剧"
def init(self, extend=""):
return
def fetch(self, url, headers=None):
"""统一的网络请求接口"""
if headers is None:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
"Referer": self.siteUrl,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
}
try:
response = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
response.raise_for_status()
return response
except Exception as e:
print(f"请求异常: {url}, 错误: {str(e)}")
return None
def isVideoFormat(self, url):
# 检查是否为视频格式
video_formats = ['.mp4', '.mkv', '.avi', '.wmv', '.m3u8', '.flv', '.rmvb']
for format in video_formats:
if format in url.lower():
return True
return False
def manualVideoCheck(self):
# 不需要手动检查
return False
def homeContent(self, filter):
"""获取首页分类及筛选"""
result = {}
# 分类列表使用已初始化的cateManual
classes = []
for k in self.cateManual:
classes.append({
'type_name': k,
'type_id': self.cateManual[k]
})
result['class'] = classes
# 获取首页推荐视频
try:
result['list'] = self.homeVideoContent()['list']
except:
result['list'] = []
return result
def homeVideoContent(self):
"""获取首页推荐视频内容"""
videos = []
try:
response = self.fetch(self.siteUrl)
html_content = response.text
# 提取NEXT_DATA JSON数据
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
if next_data_match:
next_data_json = json.loads(next_data_match.group(1))
page_props = next_data_json.get("props", {}).get("pageProps", {})
# 获取轮播图数据 - 这些通常是推荐内容
if "bannerList" in page_props and isinstance(page_props["bannerList"], list):
banner_list = page_props["bannerList"]
for banner in banner_list:
book_id = banner.get("bookId", "")
book_name = banner.get("bookName", "")
cover_url = banner.get("coverWap", banner.get("wapUrl", ""))
# 获取状态和章节数
status = banner.get("statusDesc", "")
total_chapters = banner.get("totalChapterNum", "")
if book_id and book_name:
videos.append({
"vod_id": f"/drama/{book_id}",
"vod_name": book_name,
"vod_pic": cover_url,
"vod_remarks": f"{status} {total_chapters}" if total_chapters else status
})
# SEO分类下的推荐
if "seoColumnVos" in page_props and isinstance(page_props["seoColumnVos"], list):
for column in page_props["seoColumnVos"]:
book_infos = column.get("bookInfos", [])
for book in book_infos:
book_id = book.get("bookId", "")
book_name = book.get("bookName", "")
cover_url = book.get("coverWap", "")
status = book.get("statusDesc", "")
total_chapters = book.get("totalChapterNum", "")
if book_id and book_name:
videos.append({
"vod_id": f"/drama/{book_id}",
"vod_name": book_name,
"vod_pic": cover_url,
"vod_remarks": f"{status} {total_chapters}" if total_chapters else status
})
# # 去重
# seen = set()
# unique_videos = []
# for video in videos:
# if video["vod_id"] not in seen:
# seen.add(video["vod_id"])
# unique_videos.append(video)
# videos = unique_videos
except Exception as e:
print(f"获取首页推荐内容出错: {e}")
result = {
"list": videos
}
return result
def categoryContent(self, tid, pg, filter, extend):
"""获取分类内容"""
result = {}
videos = []
url = f"{self.siteUrl}/browse/{tid}/{pg}"
response = self.fetch(url)
html_content = response.text
# 提取NEXT_DATA JSON数据
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
if next_data_match:
next_data_json = json.loads(next_data_match.group(1))
page_props = next_data_json.get("props", {}).get("pageProps", {})
# 获取总页数和当前页
current_page = page_props.get("page", 1)
total_pages = page_props.get("pages", 1)
# 获取书籍列表
book_list = page_props.get("bookList", [])
# 转换为通用格式
for book in book_list:
book_id = book.get("bookId", "")
book_name = book.get("bookName", "")
cover_url = book.get("coverWap", "")
status_desc = book.get("statusDesc", "")
total_chapters = book.get("totalChapterNum", "")
if book_id and book_name:
videos.append({
"vod_id": f"/drama/{book_id}",
"vod_name": book_name,
"vod_pic": cover_url,
"vod_remarks": f"{status_desc} {total_chapters}" if total_chapters else status_desc
})
# 构建返回结果
result = {
"list": videos,
"page": int(current_page),
"pagecount": total_pages,
"limit": len(videos),
"total": total_pages * len(videos) if videos else 0
}
return result
def switch(self, key, pg):
# 搜索功能
search_results = []
# 获取第一页结果,并检查总页数
url = f"{self.siteUrl}/search?searchValue={key}&page={pg}"
response = self.fetch(url)
html_content = response.text
# 提取NEXT_DATA JSON数据
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
if next_data_match:
next_data_json = json.loads(next_data_match.group(1))
page_props = next_data_json.get("props", {}).get("pageProps", {})
# 获取总页数
total_pages = page_props.get("pages", 1)
# 处理所有页的数据
all_book_list = []
# 添加第一页的书籍列表
book_list = page_props.get("bookList", [])
all_book_list.extend(book_list)
# 如果有多页,获取其他页的数据
if total_pages > 1 : # quick模式只获取第一页
for page in range(2, total_pages + 1):
next_page_url = f"{self.siteUrl}/search?searchValue={key}&page={page}"
next_page_response = self.fetch(next_page_url)
next_page_html = next_page_response.text
next_page_match = re.search(next_data_pattern, next_page_html, re.DOTALL)
if next_page_match:
next_page_json = json.loads(next_page_match.group(1))
next_page_props = next_page_json.get("props", {}).get("pageProps", {})
next_page_books = next_page_props.get("bookList", [])
all_book_list.extend(next_page_books)
# 转换为统一的搜索结果格式
for book in all_book_list:
book_id = book.get("bookId", "")
book_name = book.get("bookName", "")
cover_url = book.get("coverWap", "")
total_chapters = book.get("totalChapterNum", "0")
status_desc = book.get("statusDesc", "")
# 构建视频项
vod = {
"vod_id": f"/drama/{book_id}",
"vod_name": book_name,
"vod_pic": cover_url,
"vod_remarks": f"{status_desc} {total_chapters}"
}
search_results.append(vod)
result = {
"list": search_results,
"page": pg
}
return result
def searchContent(self, key, quick, pg=1):
result = self.switch(key, pg=pg)
result['page'] = pg
return result
def searchContentPage(self, key, quick, pg=1):
return self.searchContent(key, quick, pg)
def detailContent(self, ids):
# 获取剧集信息
vod_id = ids[0]
episode_id = None
chapter_id = None
if not vod_id.startswith('/drama/'):
if vod_id.startswith('/episode/'):
episode_info = vod_id.replace('/episode/', '').split('/')
if len(episode_info) >= 2:
episode_id = episode_info[0]
chapter_id = episode_info[1]
vod_id = f'/drama/{episode_id}'
else:
vod_id = '/drama/' + vod_id
drama_url = self.siteUrl + vod_id
print(f"请求URL: {drama_url}")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
"Referer": self.siteUrl,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
}
rsp = self.fetch(drama_url, headers=headers)
if not rsp or rsp.status_code != 200:
print(f"请求失败,状态码: {getattr(rsp, 'status_code', 'N/A')}")
return {}
html = rsp.text
next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
if not next_data_match:
print("未找到NEXT_DATA内容")
return {}
try:
next_data = json.loads(next_data_match.group(1))
page_props = next_data.get("props", {}).get("pageProps", {})
print(f"找到页面属性,包含 {len(page_props.keys())} 个键")
book_info = page_props.get("bookInfoVo", {})
chapter_list = page_props.get("chapterList", [])
title = book_info.get("title", "")
sub_title = f"{book_info.get('totalChapterNum', '')}"
categories = []
for category in book_info.get("categoryList", []):
categories.append(category.get("name", ""))
vod_content = book_info.get("introduction", "")
vod = {
"vod_id": vod_id,
"vod_name": title,
"vod_pic": book_info.get("coverWap", ""),
"type_name": ",".join(categories),
"vod_year": "",
"vod_area": book_info.get("countryName", ""),
"vod_remarks": sub_title,
"vod_actor": ", ".join([p.get("name", "") for p in book_info.get("performerList", [])]),
"vod_director": "",
"vod_content": vod_content
}
# 处理播放列表
play_url_list = []
episodes = []
if chapter_list:
print(f"找到 {len(chapter_list)} 个章节")
# 先检查是否有可以直接使用的MP4链接作为模板
mp4_template = None
first_mp4_chapter_id = None
# 先搜索第一个章节的MP4链接
# 为提高成功率,尝试直接请求第一个章节的播放页
if chapter_list and len(chapter_list) > 0:
first_chapter = chapter_list[0]
first_chapter_id = first_chapter.get("chapterId", "")
drama_id_clean = vod_id.replace('/drama/', '')
if first_chapter_id and drama_id_clean:
first_episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{first_chapter_id}"
print(f"请求第一集播放页: {first_episode_url}")
first_rsp = self.fetch(first_episode_url, headers=headers)
if first_rsp and first_rsp.status_code == 200:
first_html = first_rsp.text
# 直接从HTML提取MP4链接
mp4_pattern = r'(https?://[^"\']+\.mp4)'
mp4_matches = re.findall(mp4_pattern, first_html)
if mp4_matches:
mp4_template = mp4_matches[0]
first_mp4_chapter_id = first_chapter_id
print(f"找到MP4链接模板: {mp4_template}")
print(f"模板对应的章节ID: {first_mp4_chapter_id}")
# 如果未找到模板再检查章节对象中是否有MP4链接
if not mp4_template:
for chapter in chapter_list[:5]: # 只检查前5个章节以提高效率
if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
chapter_video = chapter["chapterVideoVo"]
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
if mp4_url and ".mp4" in mp4_url:
mp4_template = mp4_url
first_mp4_chapter_id = chapter.get("chapterId", "")
print(f"从chapterVideoVo找到MP4链接模板: {mp4_template}")
print(f"模板对应的章节ID: {first_mp4_chapter_id}")
break
# 遍历所有章节处理播放信息
for chapter in chapter_list:
chapter_id = chapter.get("chapterId", "")
chapter_name = chapter.get("chapterName", "")
# 1. 如果章节自身有MP4链接直接使用
if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
chapter_video = chapter["chapterVideoVo"]
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
if mp4_url and ".mp4" in mp4_url:
episodes.append(f"{chapter_name}${mp4_url}")
continue
# 2. 如果有MP4模板尝试替换章节ID构建MP4链接
if mp4_template and first_mp4_chapter_id and chapter_id:
# 替换模板中的章节ID部分
if first_mp4_chapter_id in mp4_template:
new_mp4_url = mp4_template.replace(first_mp4_chapter_id, chapter_id)
episodes.append(f"{chapter_name}${new_mp4_url}")
continue
# 3. 如果上述方法都不可行回退到使用chapter_id构建中间URL
if chapter_id and chapter_name:
url = f"{vod_id}${chapter_id}${chapter_name}"
episodes.append(f"{chapter_name}${url}")
if not episodes and vod_id:
# 尝试构造默认的集数
total_chapters = int(book_info.get("totalChapterNum", "0"))
if total_chapters > 0:
print(f"尝试构造 {total_chapters} 个默认集数")
# 如果知道章节ID的模式可以构造
if chapter_id and episode_id:
for i in range(1, total_chapters + 1):
chapter_name = f"{i}"
url = f"{vod_id}${chapter_id}${chapter_name}"
episodes.append(f"{chapter_name}${url}")
else:
# 使用普通的构造方式
for i in range(1, total_chapters + 1):
chapter_name = f"{i}"
url = f"{vod_id}${chapter_name}"
episodes.append(f"{chapter_name}${url}")
if episodes:
play_url_list.append("#".join(episodes))
vod['vod_play_from'] = '河马剧场'
vod['vod_play_url'] = '$$$'.join(play_url_list)
result = {
'list': [vod]
}
return result
except Exception as e:
print(f"解析详情页失败: {str(e)}")
print(traceback.format_exc())
return {}
def playerContent(self, flag, id, vipFlags):
result = {}
print(f"调用playerContent: flag={flag}, id={id}")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
"Referer": self.siteUrl,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
}
# 解析id参数
parts = id.split('$')
drama_id = None
chapter_id = None
if len(parts) >= 2:
drama_id = parts[0]
chapter_id = parts[1]
chapter_name = parts[2] if len(parts) > 2 else "第一集"
print(f"解析参数: drama_id={drama_id}, chapter_id={chapter_id}")
else:
# 处理旧数据格式
print(f"使用原始URL格式: {id}")
result["parse"] = 0
result["url"] = id
result["header"] = json.dumps(headers)
return result
# 直接检查chapter_id是否包含http可能已经是视频链接
if 'http' in chapter_id and '.mp4' in chapter_id:
print(f"已经是MP4链接: {chapter_id}")
result["parse"] = 0
result["url"] = chapter_id
result["header"] = json.dumps(headers)
return result
# 构建episode页面URL
drama_id_clean = drama_id.replace('/drama/', '')
episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{chapter_id}"
print(f"请求episode页面: {episode_url}")
try:
rsp = self.fetch(episode_url, headers=headers)
if not rsp or rsp.status_code != 200:
print(f"请求失败,状态码: {getattr(rsp, 'status_code', 'N/A')}")
result["parse"] = 0
result["url"] = id
result["header"] = json.dumps(headers)
return result
html = rsp.text
print(f"获取页面大小: {len(html)} 字节")
# 尝试从NEXT_DATA提取视频链接
mp4_url = None
# 方法1: 从NEXT_DATA提取
next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
if next_data_match:
try:
print("找到NEXT_DATA")
next_data = json.loads(next_data_match.group(1))
page_props = next_data.get("props", {}).get("pageProps", {})
# 从chapterList中查找当前章节
chapter_list = page_props.get("chapterList", [])
print(f"找到章节列表,长度: {len(chapter_list)}")
for chapter in chapter_list:
if chapter.get("chapterId") == chapter_id:
print(f"找到匹配的章节: {chapter.get('chapterName')}")
chapter_video = chapter.get("chapterVideoVo", {})
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
if mp4_url:
print(f"从chapterList找到MP4链接: {mp4_url}")
break
# 如果未找到,尝试从当前章节获取
if not mp4_url:
current_chapter = page_props.get("chapterInfo", {})
if current_chapter:
print("找到当前章节信息")
chapter_video = current_chapter.get("chapterVideoVo", {})
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
if mp4_url:
print(f"从chapterInfo找到MP4链接: {mp4_url}")
except Exception as e:
print(f"解析NEXT_DATA失败: {str(e)}")
print(traceback.format_exc())
# 方法2: 直接从HTML中提取MP4链接
if not mp4_url:
mp4_pattern = r'(https?://[^"\']+\.mp4)'
mp4_matches = re.findall(mp4_pattern, html)
if mp4_matches:
# 查找含有chapter_id的链接
matched_mp4 = False
for url in mp4_matches:
if chapter_id in url:
mp4_url = url
matched_mp4 = True
print(f"从HTML直接提取章节MP4链接: {mp4_url}")
break
# 如果没找到包含chapter_id的链接使用第一个
if not matched_mp4 and mp4_matches:
mp4_url = mp4_matches[0]
print(f"从HTML直接提取MP4链接: {mp4_url}")
if mp4_url and ".mp4" in mp4_url:
print(f"最终找到的MP4链接: {mp4_url}")
result["parse"] = 0
result["url"] = mp4_url
result["header"] = json.dumps(headers)
return result
else:
print(f"未找到有效的MP4链接尝试再次解析页面内容")
# 再尝试一次从HTML中广泛搜索所有可能的MP4链接
all_mp4_pattern = r'(https?://[^"\']+\.mp4)'
all_mp4_matches = re.findall(all_mp4_pattern, html)
if all_mp4_matches:
mp4_url = all_mp4_matches[0]
print(f"从HTML广泛搜索找到MP4链接: {mp4_url}")
result["parse"] = 0
result["url"] = mp4_url
result["header"] = json.dumps(headers)
return result
print(f"未找到视频链接返回原episode URL: {episode_url}")
result["parse"] = 0
result["url"] = episode_url
result["header"] = json.dumps(headers)
return result
except Exception as e:
print(f"请求或解析失败: {str(e)}")
print(traceback.format_exc())
result["parse"] = 0
result["url"] = id
result["header"] = json.dumps(headers)
return result
def localProxy(self, param):
# 本地代理处理,此处简单返回传入的参数
return [200, "video/MP2T", {}, param]
def destroy(self):
# 资源回收
pass

Binary file not shown.