fix: Invalid progress file path on Windows (#145)

2025-04-02 19:35:08 +08:00 · 2025-04-02 19:35:08 +08:00 · 9a914c1529
parent 50bbfce464
commit 9a914c1529
8 changed files with 147 additions and 9 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,6 +13,7 @@ llama.cpp
 *.ipynb
 data/db/*
 data/chroma_db/*
+data/
 lpm_kernel/L2/base_model/
 lpm_kernel/L2/data_pipeline/output/
 lpm_kernel/L2/data_pipeline/graphrag_indexing/cache/
--- a/Dockerfile.backend.apple
+++ b/Dockerfile.backend.apple
@ -0,0 +1,99 @@
+FROM --platform=linux/arm64 python:3.12-bullseye
+
+# Set working directory
+WORKDIR /app
+
+# 1. Install system dependencies (including SQLite compilation dependencies)
+RUN apt-get update && apt-get install -y \
+    build-essential cmake git curl wget lsof vim unzip \
+    libsqlite3-dev tcl-dev tk-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# 2. Prioritize compiling and installing the new version of SQLite
+RUN wget https://www.sqlite.org/2025/sqlite-autoconf-3490100.tar.gz \
+    && tar xzf sqlite-autoconf-3490100.tar.gz \
+    && cd sqlite-autoconf-3490100 \
+    && ./configure --enable-fts5   --prefix=/usr/local \
+    && make -j$(nproc) \
+    && make install \
+    && cd .. \
+    && rm -rf sqlite-autoconf-3490100* \
+    && ldconfig
+
+# 3. Configure Python compilation environment
+ENV CFLAGS="-I/usr/local/include -DSQLITE_ENABLE_FTS5"
+ENV LDFLAGS="-L/usr/local/lib -lsqlite3"
+ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"
+
+# 4. Configure Python environment
+RUN pip install --upgrade pip \
+    && pip install poetry \
+    && poetry config virtualenvs.create false
+
+# 5. Force source code compilation of pysqlite3
+RUN pip install pysqlite3 --no-binary pysqlite3
+
+# 6. Verify SQLite version
+RUN python -c "import sqlite3; print('SQLite version:', sqlite3.sqlite_version); assert sqlite3.sqlite_version.startswith('3.49.1'), 'Wrong SQLite version!'"
+
+# Maintain the original project configuration for what follows...
+# -----------------------------------------------------------
+# The following keeps the original project configuration unchanged
+# Create directories
+RUN mkdir -p /app/dependencies /app/data/sqlite /app/data/chroma_db /app/logs /app/run /app/resources
+
+# Copy dependency files
+COPY dependencies/graphrag-modified.tar.gz /app/dependencies/
+COPY dependencies/llama.cpp.zip /app/dependencies/
+
+# Build llama.cpp
+RUN LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" \
+    && echo "Using local llama.cpp archive..." \
+    && unzip -q "$LLAMA_LOCAL_ZIP" \
+    && cd llama.cpp \
+    && mkdir -p build && cd build \
+    && cmake .. \
+    && cmake --build . --config Release \
+    && if [ ! -f "bin/llama-server" ]; then \
+         echo "Build failed: llama-server executable not found" && exit 1; \
+       else \
+         echo "Successfully built llama-server"; \
+       fi \
+    && cp bin/llama-server /usr/local/bin/ \
+    && chmod +x /usr/local/bin/llama-server \
+    && echo "Installed llama-server to /usr/local/bin/"
+
+# Copy project configuration
+COPY pyproject.toml README.md /app/
+
+RUN pip install -U pip setuptools wheel
+RUN pip install --no-cache-dir spacy==3.7.5
+RUN pip install --force-reinstall dependencies/graphrag-modified.tar.gz
+
+RUN pip uninstall -y chromadb \
+ && pip install chromadb==0.4.24 --no-binary chromadb --force-reinstall 
+
+RUN poetry install --no-interaction --no-root
+
+# Copy source code
+COPY docker/ /app/docker/
+COPY lpm_kernel/ /app/lpm_kernel/
+
+# Check module import
+RUN python -c "import lpm_kernel; print('Module import check passed')"
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app \
+    BASE_DIR=/app/data \
+    LOCAL_LOG_DIR=/app/logs \
+    RUN_DIR=/app/run \
+    RESOURCES_DIR=/app/resources \
+    APP_ROOT=/app \
+    FLASK_APP=lpm_kernel.app
+
+# Expose ports
+EXPOSE 8002 8080
+
+CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"]
--- a/18
+++ b/18
@ -11,6 +11,19 @@

 .PHONY: install test format lint all setup start stop restart restart-backend restart-force help check-conda check-env docker-build docker-up docker-down docker-build-backend docker-build-frontend docker-restart-backend docker-restart-frontend docker-restart-all

+# Detect Apple Silicon without printing
+ifeq ($(shell uname -s),Darwin)
+  ifeq ($(shell uname -m),arm64)
+    APPLE_SILICON := 1
+    # Set PLATFORM=apple for all docker commands
+    docker-%: export PLATFORM=apple
+  else
+    APPLE_SILICON := 0
+  endif
+else
+  APPLE_SILICON := 0
+endif
+
 # Show help message
 help:
 	@echo "\033[0;36m"
@ -53,6 +66,11 @@ help:
 	@echo "  make format                - Format code"
 	@echo "  make lint                  - Check code style"
 	@echo "  make all                   - Run format, lint and test"
+	@if [ "$(APPLE_SILICON)" = "1" ]; then \
+		echo ""; \
+		echo "\033[1;32m▶ PLATFORM INFORMATION:\033[0m"; \
+		echo "  Apple Silicon detected - Docker commands will use PLATFORM=apple"; \
+	fi

 # Check if in conda environment
 check-conda:
--- a/README.md
+++ b/README.md
@ -198,6 +198,12 @@ make docker-restart-backend
 make docker-restart-frontend
 ```

+- Please notice that if you are using Apple Silicon and you want to run docker commands directly, you need to set the `PLATFORM` environment variable to `apple`. For example:
+```bash
+PLATFORM=apple docker-compose up -d --build
+```
+
+
 </details>


--- a/dependencies/graphrag-modified.tar.gz
+++ b/dependencies/graphrag-modified.tar.gz
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -4,7 +4,7 @@ services:
  backend:
    build:
      context: .
-      dockerfile: Dockerfile.backend
+      dockerfile: Dockerfile.backend${PLATFORM:+.${PLATFORM}}
    container_name: second-me-backend
    restart: unless-stopped
    ports:
@ -21,6 +21,7 @@ services:
      # Environment variables
      - LOCAL_APP_PORT=8002
      - IN_DOCKER_ENV=1
+      - PLATFORM=${PLATFORM:-linux}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    deploy:
--- a/lpm_kernel/L2/train_for_user.sh
+++ b/lpm_kernel/L2/train_for_user.sh
@ -1,2 +1,8 @@
-python lpm_kernel/L2/train.py  --seed 42  --model_name_or_path "${MODEL_BASE_PATH}"  --user_name "${USER_NAME}"  --dataset_name "resources/L2/data/merged.json"  --chat_template_format "chatml"  --add_special_tokens False  --append_concat_token False  --max_seq_length 512  --num_train_epochs 3  --save_total_limit 2  --logging_steps 20  --log_level "info"  --logging_strategy "steps"  --save_strategy "epoch"  --push_to_hub False  --bf16 True  --packing False  --learning_rate 2e-4  --lr_scheduler_type "cosine"  --weight_decay 1e-4  --max_grad_norm 0.3  --output_dir "${MODEL_PERSONAL_DIR}"  --per_device_train_batch_size 2  --gradient_accumulation_steps 1  --gradient_checkpointing True  --use_reentrant True  --use_peft_lora True  --lora_r 8  --lora_alpha 16  --lora_dropout 0.1  --lora_target_modules "all-linear"  --use_4bit_quantization False  --use_nested_quant False  --bnb_4bit_compute_dtype "bfloat16"
+CMD_ARGS="--seed 42  --model_name_or_path ${MODEL_BASE_PATH}  --user_name ${USER_NAME}  --dataset_name resources/L2/data/merged.json  --chat_template_format chatml  --add_special_tokens False  --append_concat_token False  --max_seq_length 512  --num_train_epochs 3  --save_total_limit 2  --logging_steps 20  --log_level info  --logging_strategy steps  --save_strategy epoch  --push_to_hub False  --packing False  --learning_rate 2e-4  --lr_scheduler_type cosine  --weight_decay 1e-4  --max_grad_norm 0.3  --output_dir ${MODEL_PERSONAL_DIR}  --per_device_train_batch_size 2  --gradient_accumulation_steps 1  --gradient_checkpointing True  --use_reentrant True  --use_peft_lora True  --lora_r 8  --lora_alpha 16  --lora_dropout 0.1  --lora_target_modules all-linear  --use_4bit_quantization False  --use_nested_quant False  --bnb_4bit_compute_dtype bfloat16"

+# Add BF16 option based on the platform
+if [ "$PLATFORM" != "apple" ]; then
+  CMD_ARGS="$CMD_ARGS --bf16 True"
+fi
+
+python lpm_kernel/L2/train.py $CMD_ARGS
--- a/lpm_kernel/L2/utils.py
+++ b/lpm_kernel/L2/utils.py
@ -161,13 +161,20 @@ def create_and_prepare_model(args, data_args, training_args):
            load_in_4bit=args.use_4bit_quantization,
        )
    else:
-        model = AutoModelForCausalLM.from_pretrained(
-            args.model_name_or_path,
-            quantization_config=bnb_config,
-            trust_remote_code=True,
-            attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
-            torch_dtype=torch.bfloat16,
-        )
+        if os.getenv("PLATFORM") != "apple":
+            model = AutoModelForCausalLM.from_pretrained(
+                args.model_name_or_path,
+                quantization_config=bnb_config,
+                trust_remote_code=True,
+                attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
+                torch_dtype=torch.bfloat16
+            )
+        else:
+            model = AutoModelForCausalLM.from_pretrained(
+                args.model_name_or_path,
+                quantization_config=bnb_config,
+                trust_remote_code=True
+            )

    peft_config = None
    chat_template = None