1.0
This commit is contained in:
parent
8bec9d1b8a
commit
1250a6070a
60
README.md
60
README.md
|
@ -1,2 +1,60 @@
|
|||
# copy-ofesfsedwa
|
||||
# 反向代理服务器
|
||||
|
||||
这是一个使用 Python 和 Flask 构建的简单反向代理服务器。它可以将所有请求代理到在 `.env` 文件中配置的目标 URL,并动态重写响应内容(HTML, CSS, JS)中的链接,以确保所有资源都通过代理加载。
|
||||
|
||||
## 主要功能
|
||||
|
||||
- **反向代理**: 将所有传入的请求(包括 GET, POST 等方法和路径)转发到目标服务器。
|
||||
- **URL 重写**: 自动重写 HTML (`href`, `src`, `action`)、CSS (`url()`, `@import`) 和 JavaScript 中的 URL,使其指向代理服务器而不是原始目标。
|
||||
- **流式响应**: 对于非文本内容(如图片、视频),服务器以流式方式传输,以提高性能和减少内存使用。
|
||||
- **CORS 支持**: 自动添加 CORS (跨域资源共享) 头,允许跨域请求。
|
||||
- **配置简单**: 只需在 `.env` 文件中设置目标 URL 即可。
|
||||
|
||||
## 技术栈
|
||||
|
||||
- **Python**
|
||||
- **Flask**: Web 框架
|
||||
- **Requests**: 用于发送 HTTP 请求
|
||||
- **BeautifulSoup4** & **lxml**: 用于解析和修改 HTML 内容
|
||||
- **Waitress**: 用于生产环境的 WSGI 服务器
|
||||
- **python-dotenv**: 用于管理环境变量
|
||||
|
||||
## 安装与配置
|
||||
|
||||
1. **克隆仓库**
|
||||
```bash
|
||||
git clone <your-repo-url>
|
||||
cd <your-repo-directory>
|
||||
```
|
||||
|
||||
2. **安装依赖**
|
||||
建议在虚拟环境中使用 `pip` 安装项目所需的依赖项:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. **创建配置文件**
|
||||
在项目根目录下创建一个名为 `.env` 的文件,并添加以下内容:
|
||||
```
|
||||
TARGET_URL=https://example.com
|
||||
```
|
||||
将 `https://example.com` 替换为您想要代理的目标网站 URL。
|
||||
|
||||
## 如何运行
|
||||
|
||||
完成安装和配置后,运行以下命令启动服务器:
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
服务器将在 `http://0.0.0.0:8080` 上启动。现在,您可以通过访问 `http://localhost:8080` 来浏览代理的网站。
|
||||
|
||||
## 工作原理
|
||||
|
||||
该服务器拦截所有传入的 HTTP 请求。它会构建一个指向 `TARGET_URL` 的新请求,并将原始请求的路径、查询参数、请求头和数据包体一并转发。
|
||||
|
||||
当从目标服务器收到响应后:
|
||||
- 如果响应是文本类型(如 `text/html`, `text/css`, `application/javascript`),服务器会先读取全部内容,使用 `BeautifulSoup` 和正则表达式查找并替换所有指向原始域名的 URL,然后将修改后的内容返回给客户端。
|
||||
- 如果响应是二进制内容(如图片),服务器会直接将内容以数据流的形式转发给客户端,以避免不必要的内存消耗。
|
||||
- 它还会处理 HTTP 重定向,并重写 `Location` 头,以确保用户停留在代理服务器上。
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
import os
|
||||
import requests
|
||||
import logging
|
||||
import re
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from bs4 import BeautifulSoup
|
||||
from flask import Flask, Response, request, stream_with_context
|
||||
from dotenv import load_dotenv
|
||||
from waitress import serve
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
app = Flask(__name__)
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Get target URL from environment variables
|
||||
TARGET_URL = os.environ.get('TARGET_URL')
|
||||
if not TARGET_URL:
|
||||
raise ValueError("TARGET_URL is not set in the .env file")
|
||||
|
||||
TARGET_PARSED = urlparse(TARGET_URL)
|
||||
TARGET_HOST = TARGET_PARSED.netloc
|
||||
|
||||
def rewrite_text(content, content_type, proxy_base_url):
|
||||
"""Rewrites URLs in text-based content (HTML, CSS, JS)."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
soup = None
|
||||
if 'text/html' in content_type:
|
||||
soup = BeautifulSoup(content, 'lxml')
|
||||
# Rewrite links in tags like a, link, script, img, etc.
|
||||
for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src'), ('form', 'action')]:
|
||||
for t in soup.find_all(tag, **{attr: True}):
|
||||
original_url = t[attr]
|
||||
absolute_url = urljoin(TARGET_URL, original_url)
|
||||
if urlparse(absolute_url).netloc == TARGET_HOST:
|
||||
t[attr] = absolute_url.replace(TARGET_URL, proxy_base_url, 1)
|
||||
|
||||
# Rewrite inline styles
|
||||
for tag in soup.find_all(style=True):
|
||||
tag['style'] = re.sub(r'url\((.*?)\)',
|
||||
lambda m: f"url({urljoin(TARGET_URL, m.group(1)).replace(TARGET_URL, proxy_base_url, 1)})",
|
||||
tag['style'])
|
||||
|
||||
return str(soup)
|
||||
|
||||
elif 'css' in content_type or 'javascript' in content_type:
|
||||
# A simpler regex for CSS/JS url rewriting
|
||||
content_str = content.decode('utf-8', errors='ignore')
|
||||
# Regex to find url(...) and import statements
|
||||
def replace_url(match):
|
||||
url = match.group(1) or match.group(2) or match.group(3)
|
||||
if url.startswith(('http://', 'https://', '//')):
|
||||
return match.group(0) # Don't touch absolute URLs from other domains
|
||||
absolute_url = urljoin(TARGET_URL, url)
|
||||
return match.group(0).replace(url, absolute_url.replace(TARGET_URL, proxy_base_url, 1))
|
||||
|
||||
rewritten_content = re.sub(r'url\((["\']?)(.*?)\1\)|@import\s+(["\'])(.*?)\3', replace_url, content_str)
|
||||
return rewritten_content.encode('utf-8')
|
||||
|
||||
return content
|
||||
|
||||
@app.after_request
|
||||
def add_cors_headers(response):
|
||||
response.headers['Access-Control-Allow-Origin'] = '*'
|
||||
response.headers['Access-Control-Allow-Methods'] = 'GET,POST,PUT,DELETE,PATCH,OPTIONS'
|
||||
response.headers['Access-Control-Allow-Headers'] = 'Content-Type,Authorization,Origin,Accept,User-Agent,Cookie'
|
||||
response.headers['Access-Control-Allow-Credentials'] = 'true'
|
||||
return response
|
||||
|
||||
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
|
||||
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
|
||||
def proxy(path):
|
||||
# Construct the full target URL
|
||||
target_url = urljoin(TARGET_URL, path)
|
||||
if request.query_string:
|
||||
target_url += '?' + request.query_string.decode('utf-8')
|
||||
|
||||
# Prepare headers for the outgoing request
|
||||
headers = {key: value for key, value in request.headers if key.lower() != 'host'}
|
||||
headers['Host'] = TARGET_HOST
|
||||
headers['Referer'] = TARGET_URL
|
||||
headers['Origin'] = TARGET_URL.rstrip('/')
|
||||
|
||||
try:
|
||||
resp = requests.request(
|
||||
method=request.method,
|
||||
url=target_url,
|
||||
headers=headers,
|
||||
data=request.get_data(),
|
||||
cookies=request.cookies,
|
||||
stream=True,
|
||||
allow_redirects=False, # We handle redirects manually
|
||||
timeout=30
|
||||
)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"Proxy error connecting to {target_url}: {e}")
|
||||
return "Proxy connection error", 502
|
||||
|
||||
# Handle redirects
|
||||
if resp.status_code in (301, 302, 307, 308):
|
||||
location = resp.headers.get('Location')
|
||||
if location:
|
||||
# Rewrite the redirect location to point back to the proxy
|
||||
proxy_location = location.replace(TARGET_URL, request.host_url, 1)
|
||||
response = Response(status=resp.status_code)
|
||||
response.headers['Location'] = proxy_location
|
||||
return response
|
||||
|
||||
# Filter out problematic headers from the response
|
||||
excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
|
||||
response_headers = [(name, value) for name, value in resp.headers.items() if name.lower() not in excluded_headers]
|
||||
|
||||
content_type = resp.headers.get('Content-Type', '')
|
||||
proxy_base_url = request.host_url
|
||||
|
||||
def generate():
|
||||
# For text content, we need to buffer it, rewrite it, and then send it.
|
||||
if any(t in content_type for t in ['html', 'css', 'javascript']):
|
||||
content = resp.content # This reads the whole content
|
||||
rewritten_content = rewrite_text(content, content_type, proxy_base_url)
|
||||
if rewritten_content:
|
||||
yield rewritten_content if isinstance(rewritten_content, bytes) else rewritten_content.encode('utf-8')
|
||||
else:
|
||||
# For other content (images, etc.), stream it directly
|
||||
for chunk in resp.iter_content(chunk_size=8192):
|
||||
yield chunk
|
||||
|
||||
response = Response(stream_with_context(generate()), status=resp.status_code, headers=response_headers)
|
||||
return response
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Starting production server on http://0.0.0.0:8080")
|
||||
serve(app, host='0.0.0.0', port=8080, threads=16)
|
|
@ -0,0 +1,6 @@
|
|||
flask
|
||||
requests
|
||||
python-dotenv
|
||||
waitress
|
||||
beautifulsoup4
|
||||
lxml
|
Loading…
Reference in New Issue