copy-ofesfsedwa/main.py

import os
import requests
import logging
import re
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
from flask import Flask, Response, request, stream_with_context
from dotenv import load_dotenv
from waitress import serve

# Load environment variables from .env file
load_dotenv()

app = Flask(__name__)
logging.basicConfig(level=logging.INFO)

# Get target URL from environment variables
TARGET_URL = os.environ.get('TARGET_URL')
if not TARGET_URL:
    raise ValueError("TARGET_URL is not set in the .env file")

TARGET_PARSED = urlparse(TARGET_URL)
TARGET_HOST = TARGET_PARSED.netloc

def rewrite_text(content, content_type, proxy_base_url):
    """Rewrites URLs in text-based content (HTML, CSS, JS)."""
    if not content:
        return content

    soup = None
    if 'text/html' in content_type:
        soup = BeautifulSoup(content, 'lxml')
        # Rewrite links in tags like a, link, script, img, etc.
        for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src'), ('form', 'action')]:
            for t in soup.find_all(tag, **{attr: True}):
                original_url = t[attr]
                absolute_url = urljoin(TARGET_URL, original_url)
                if urlparse(absolute_url).netloc == TARGET_HOST:
                    t[attr] = absolute_url.replace(TARGET_URL, proxy_base_url, 1)

        # Rewrite inline styles
        for tag in soup.find_all(style=True):
            tag['style'] = re.sub(r'url\((.*?)\)',
                                  lambda m: f"url({urljoin(TARGET_URL, m.group(1)).replace(TARGET_URL, proxy_base_url, 1)})",
                                  tag['style'])

        return str(soup)

    elif 'css' in content_type or 'javascript' in content_type:
        # A simpler regex for CSS/JS url rewriting
        content_str = content.decode('utf-8', errors='ignore')
        # Regex to find url(...) and import statements
        def replace_url(match):
            url = match.group(1) or match.group(2) or match.group(3)
            if url.startswith(('http://', 'https://', '//')):
                return match.group(0) # Don't touch absolute URLs from other domains
            absolute_url = urljoin(TARGET_URL, url)
            return match.group(0).replace(url, absolute_url.replace(TARGET_URL, proxy_base_url, 1))

        rewritten_content = re.sub(r'url\((["\']?)(.*?)\1\)|@import\s+(["\'])(.*?)\3', replace_url, content_str)
        return rewritten_content.encode('utf-8')

    return content

@app.after_request
def add_cors_headers(response):
    response.headers['Access-Control-Allow-Origin'] = '*'
    response.headers['Access-Control-Allow-Methods'] = 'GET,POST,PUT,DELETE,PATCH,OPTIONS'
    response.headers['Access-Control-Allow-Headers'] = 'Content-Type,Authorization,Origin,Accept,User-Agent,Cookie'
    response.headers['Access-Control-Allow-Credentials'] = 'true'
    return response

@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
def proxy(path):
    # Construct the full target URL
    target_url = urljoin(TARGET_URL, path)
    if request.query_string:
        target_url += '?' + request.query_string.decode('utf-8')

    # Prepare headers for the outgoing request
    headers = {key: value for key, value in request.headers if key.lower() != 'host'}
    headers['Host'] = TARGET_HOST
    headers['Referer'] = TARGET_URL
    headers['Origin'] = TARGET_URL.rstrip('/')

    try:
        resp = requests.request(
            method=request.method,
            url=target_url,
            headers=headers,
            data=request.get_data(),
            cookies=request.cookies,
            stream=True,
            allow_redirects=False,  # We handle redirects manually
            timeout=30
        )
    except requests.exceptions.RequestException as e:
        logging.error(f"Proxy error connecting to {target_url}: {e}")
        return "Proxy connection error", 502

    # Handle redirects
    if resp.status_code in (301, 302, 307, 308):
        location = resp.headers.get('Location')
        if location:
            # Rewrite the redirect location to point back to the proxy
            proxy_location = location.replace(TARGET_URL, request.host_url, 1)
            response = Response(status=resp.status_code)
            response.headers['Location'] = proxy_location
            return response

    # Filter out problematic headers from the response
    excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
    response_headers = [(name, value) for name, value in resp.headers.items() if name.lower() not in excluded_headers]

    content_type = resp.headers.get('Content-Type', '')
    proxy_base_url = request.host_url

    def generate():
        # For text content, we need to buffer it, rewrite it, and then send it.
        if any(t in content_type for t in ['html', 'css', 'javascript']):
            content = resp.content # This reads the whole content
            rewritten_content = rewrite_text(content, content_type, proxy_base_url)
            if rewritten_content:
                yield rewritten_content if isinstance(rewritten_content, bytes) else rewritten_content.encode('utf-8')
        else:
            # For other content (images, etc.), stream it directly
            for chunk in resp.iter_content(chunk_size=8192):
                yield chunk

    response = Response(stream_with_context(generate()), status=resp.status_code, headers=response_headers)
    return response

if __name__ == '__main__':
    print("Starting production server on http://0.0.0.0:8080")
    serve(app, host='0.0.0.0', port=8080, threads=16)