1.0

2025-07-30 22:08:31 +08:00 · 2025-07-30 22:08:31 +08:00 · 1250a6070a
parent 8bec9d1b8a
commit 1250a6070a
4 changed files with 203 additions and 1 deletions
--- a/.env
+++ b/.env
@ -0,0 +1,2 @@
+# The target URL to proxy
+TARGET_URL=
--- a/README.md
+++ b/README.md
@ -1,2 +1,60 @@
-# copy-ofesfsedwa
+# 反向代理服务器

+这是一个使用 Python 和 Flask 构建的简单反向代理服务器。它可以将所有请求代理到在 `.env` 文件中配置的目标 URL，并动态重写响应内容（HTML, CSS, JS）中的链接，以确保所有资源都通过代理加载。
+
+## 主要功能
+
+- **反向代理**: 将所有传入的请求（包括 GET, POST 等方法和路径）转发到目标服务器。
+- **URL 重写**: 自动重写 HTML (`href`, `src`, `action`)、CSS (`url()`, `@import`) 和 JavaScript 中的 URL，使其指向代理服务器而不是原始目标。
+- **流式响应**: 对于非文本内容（如图片、视频），服务器以流式方式传输，以提高性能和减少内存使用。
+- **CORS 支持**: 自动添加 CORS (跨域资源共享) 头，允许跨域请求。
+- **配置简单**: 只需在 `.env` 文件中设置目标 URL 即可。
+
+## 技术栈
+
+- **Python**
+- **Flask**: Web 框架
+- **Requests**: 用于发送 HTTP 请求
+- **BeautifulSoup4** & **lxml**: 用于解析和修改 HTML 内容
+- **Waitress**: 用于生产环境的 WSGI 服务器
+- **python-dotenv**: 用于管理环境变量
+
+## 安装与配置
+
+1.  **克隆仓库**
+    ```bash
+    git clone <your-repo-url>
+    cd <your-repo-directory>
+    ```
+
+2.  **安装依赖**
+    建议在虚拟环境中使用 `pip` 安装项目所需的依赖项：
+    ```bash
+    pip install -r requirements.txt
+    ```
+
+3.  **创建配置文件**
+    在项目根目录下创建一个名为 `.env` 的文件，并添加以下内容：
+    ```
+    TARGET_URL=https://example.com
+    ```
+    将 `https://example.com` 替换为您想要代理的目标网站 URL。
+
+## 如何运行
+
+完成安装和配置后，运行以下命令启动服务器：
+
+```bash
+python main.py
+```
+
+服务器将在 `http://0.0.0.0:8080` 上启动。现在，您可以通过访问 `http://localhost:8080` 来浏览代理的网站。
+
+## 工作原理
+
+该服务器拦截所有传入的 HTTP 请求。它会构建一个指向 `TARGET_URL` 的新请求，并将原始请求的路径、查询参数、请求头和数据包体一并转发。
+
+当从目标服务器收到响应后：
+- 如果响应是文本类型（如 `text/html`, `text/css`, `application/javascript`），服务器会先读取全部内容，使用 `BeautifulSoup` 和正则表达式查找并替换所有指向原始域名的 URL，然后将修改后的内容返回给客户端。
+- 如果响应是二进制内容（如图片），服务器会直接将内容以数据流的形式转发给客户端，以避免不必要的内存消耗。
+- 它还会处理 HTTP 重定向，并重写 `Location` 头，以确保用户停留在代理服务器上。
--- a/main.py
+++ b/main.py
@ -0,0 +1,136 @@
+import os
+import requests
+import logging
+import re
+from urllib.parse import urlparse, urljoin
+from bs4 import BeautifulSoup
+from flask import Flask, Response, request, stream_with_context
+from dotenv import load_dotenv
+from waitress import serve
+
+# Load environment variables from .env file
+load_dotenv()
+
+app = Flask(__name__)
+logging.basicConfig(level=logging.INFO)
+
+# Get target URL from environment variables
+TARGET_URL = os.environ.get('TARGET_URL')
+if not TARGET_URL:
+    raise ValueError("TARGET_URL is not set in the .env file")
+
+TARGET_PARSED = urlparse(TARGET_URL)
+TARGET_HOST = TARGET_PARSED.netloc
+
+def rewrite_text(content, content_type, proxy_base_url):
+    """Rewrites URLs in text-based content (HTML, CSS, JS)."""
+    if not content:
+        return content
+
+    soup = None
+    if 'text/html' in content_type:
+        soup = BeautifulSoup(content, 'lxml')
+        # Rewrite links in tags like a, link, script, img, etc.
+        for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src'), ('form', 'action')]:
+            for t in soup.find_all(tag, **{attr: True}):
+                original_url = t[attr]
+                absolute_url = urljoin(TARGET_URL, original_url)
+                if urlparse(absolute_url).netloc == TARGET_HOST:
+                    t[attr] = absolute_url.replace(TARGET_URL, proxy_base_url, 1)
+        
+        # Rewrite inline styles
+        for tag in soup.find_all(style=True):
+            tag['style'] = re.sub(r'url\((.*?)\)', 
+                                  lambda m: f"url({urljoin(TARGET_URL, m.group(1)).replace(TARGET_URL, proxy_base_url, 1)})", 
+                                  tag['style'])
+        
+        return str(soup)
+
+    elif 'css' in content_type or 'javascript' in content_type:
+        # A simpler regex for CSS/JS url rewriting
+        content_str = content.decode('utf-8', errors='ignore')
+        # Regex to find url(...) and import statements
+        def replace_url(match):
+            url = match.group(1) or match.group(2) or match.group(3)
+            if url.startswith(('http://', 'https://', '//')):
+                return match.group(0) # Don't touch absolute URLs from other domains
+            absolute_url = urljoin(TARGET_URL, url)
+            return match.group(0).replace(url, absolute_url.replace(TARGET_URL, proxy_base_url, 1))
+
+        rewritten_content = re.sub(r'url\((["\']?)(.*?)\1\)|@import\s+(["\'])(.*?)\3', replace_url, content_str)
+        return rewritten_content.encode('utf-8')
+
+    return content
+
+@app.after_request
+def add_cors_headers(response):
+    response.headers['Access-Control-Allow-Origin'] = '*'
+    response.headers['Access-Control-Allow-Methods'] = 'GET,POST,PUT,DELETE,PATCH,OPTIONS'
+    response.headers['Access-Control-Allow-Headers'] = 'Content-Type,Authorization,Origin,Accept,User-Agent,Cookie'
+    response.headers['Access-Control-Allow-Credentials'] = 'true'
+    return response
+
+@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
+@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
+def proxy(path):
+    # Construct the full target URL
+    target_url = urljoin(TARGET_URL, path)
+    if request.query_string:
+        target_url += '?' + request.query_string.decode('utf-8')
+
+    # Prepare headers for the outgoing request
+    headers = {key: value for key, value in request.headers if key.lower() != 'host'}
+    headers['Host'] = TARGET_HOST
+    headers['Referer'] = TARGET_URL
+    headers['Origin'] = TARGET_URL.rstrip('/')
+
+    try:
+        resp = requests.request(
+            method=request.method,
+            url=target_url,
+            headers=headers,
+            data=request.get_data(),
+            cookies=request.cookies,
+            stream=True,
+            allow_redirects=False,  # We handle redirects manually
+            timeout=30
+        )
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Proxy error connecting to {target_url}: {e}")
+        return "Proxy connection error", 502
+
+    # Handle redirects
+    if resp.status_code in (301, 302, 307, 308):
+        location = resp.headers.get('Location')
+        if location:
+            # Rewrite the redirect location to point back to the proxy
+            proxy_location = location.replace(TARGET_URL, request.host_url, 1)
+            response = Response(status=resp.status_code)
+            response.headers['Location'] = proxy_location
+            return response
+
+    # Filter out problematic headers from the response
+    excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
+    response_headers = [(name, value) for name, value in resp.headers.items() if name.lower() not in excluded_headers]
+
+    content_type = resp.headers.get('Content-Type', '')
+    proxy_base_url = request.host_url
+
+    def generate():
+        # For text content, we need to buffer it, rewrite it, and then send it.
+        if any(t in content_type for t in ['html', 'css', 'javascript']):
+            content = resp.content # This reads the whole content
+            rewritten_content = rewrite_text(content, content_type, proxy_base_url)
+            if rewritten_content:
+                yield rewritten_content if isinstance(rewritten_content, bytes) else rewritten_content.encode('utf-8')
+        else:
+            # For other content (images, etc.), stream it directly
+            for chunk in resp.iter_content(chunk_size=8192):
+                yield chunk
+
+    response = Response(stream_with_context(generate()), status=resp.status_code, headers=response_headers)
+    return response
+
+if __name__ == '__main__':
+    print("Starting production server on http://0.0.0.0:8080")
+    serve(app, host='0.0.0.0', port=8080, threads=16)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
+flask
+requests
+python-dotenv
+waitress
+beautifulsoup4
+lxml