fix: Invalid progress file path on Windows (#145)

This commit is contained in:
GoForceX 2025-04-02 19:35:08 +08:00 committed by Yong Wei
parent 50bbfce464
commit 9a914c1529
8 changed files with 147 additions and 9 deletions

1
.gitignore vendored
View File

@ -13,6 +13,7 @@ llama.cpp
*.ipynb
data/db/*
data/chroma_db/*
data/
lpm_kernel/L2/base_model/
lpm_kernel/L2/data_pipeline/output/
lpm_kernel/L2/data_pipeline/graphrag_indexing/cache/

99
Dockerfile.backend.apple Normal file
View File

@ -0,0 +1,99 @@
FROM --platform=linux/arm64 python:3.12-bullseye
# Set working directory
WORKDIR /app
# 1. Install system dependencies (including SQLite compilation dependencies)
RUN apt-get update && apt-get install -y \
build-essential cmake git curl wget lsof vim unzip \
libsqlite3-dev tcl-dev tk-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# 2. Prioritize compiling and installing the new version of SQLite
RUN wget https://www.sqlite.org/2025/sqlite-autoconf-3490100.tar.gz \
&& tar xzf sqlite-autoconf-3490100.tar.gz \
&& cd sqlite-autoconf-3490100 \
&& ./configure --enable-fts5 --prefix=/usr/local \
&& make -j$(nproc) \
&& make install \
&& cd .. \
&& rm -rf sqlite-autoconf-3490100* \
&& ldconfig
# 3. Configure Python compilation environment
ENV CFLAGS="-I/usr/local/include -DSQLITE_ENABLE_FTS5"
ENV LDFLAGS="-L/usr/local/lib -lsqlite3"
ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"
# 4. Configure Python environment
RUN pip install --upgrade pip \
&& pip install poetry \
&& poetry config virtualenvs.create false
# 5. Force source code compilation of pysqlite3
RUN pip install pysqlite3 --no-binary pysqlite3
# 6. Verify SQLite version
RUN python -c "import sqlite3; print('SQLite version:', sqlite3.sqlite_version); assert sqlite3.sqlite_version.startswith('3.49.1'), 'Wrong SQLite version!'"
# Maintain the original project configuration for what follows...
# -----------------------------------------------------------
# The following keeps the original project configuration unchanged
# Create directories
RUN mkdir -p /app/dependencies /app/data/sqlite /app/data/chroma_db /app/logs /app/run /app/resources
# Copy dependency files
COPY dependencies/graphrag-modified.tar.gz /app/dependencies/
COPY dependencies/llama.cpp.zip /app/dependencies/
# Build llama.cpp
RUN LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" \
&& echo "Using local llama.cpp archive..." \
&& unzip -q "$LLAMA_LOCAL_ZIP" \
&& cd llama.cpp \
&& mkdir -p build && cd build \
&& cmake .. \
&& cmake --build . --config Release \
&& if [ ! -f "bin/llama-server" ]; then \
echo "Build failed: llama-server executable not found" && exit 1; \
else \
echo "Successfully built llama-server"; \
fi \
&& cp bin/llama-server /usr/local/bin/ \
&& chmod +x /usr/local/bin/llama-server \
&& echo "Installed llama-server to /usr/local/bin/"
# Copy project configuration
COPY pyproject.toml README.md /app/
RUN pip install -U pip setuptools wheel
RUN pip install --no-cache-dir spacy==3.7.5
RUN pip install --force-reinstall dependencies/graphrag-modified.tar.gz
RUN pip uninstall -y chromadb \
&& pip install chromadb==0.4.24 --no-binary chromadb --force-reinstall
RUN poetry install --no-interaction --no-root
# Copy source code
COPY docker/ /app/docker/
COPY lpm_kernel/ /app/lpm_kernel/
# Check module import
RUN python -c "import lpm_kernel; print('Module import check passed')"
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONPATH=/app \
BASE_DIR=/app/data \
LOCAL_LOG_DIR=/app/logs \
RUN_DIR=/app/run \
RESOURCES_DIR=/app/resources \
APP_ROOT=/app \
FLASK_APP=lpm_kernel.app
# Expose ports
EXPOSE 8002 8080
CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"]

View File

@ -11,6 +11,19 @@
.PHONY: install test format lint all setup start stop restart restart-backend restart-force help check-conda check-env docker-build docker-up docker-down docker-build-backend docker-build-frontend docker-restart-backend docker-restart-frontend docker-restart-all
# Detect Apple Silicon without printing
ifeq ($(shell uname -s),Darwin)
ifeq ($(shell uname -m),arm64)
APPLE_SILICON := 1
# Set PLATFORM=apple for all docker commands
docker-%: export PLATFORM=apple
else
APPLE_SILICON := 0
endif
else
APPLE_SILICON := 0
endif
# Show help message
help:
@echo "\033[0;36m"
@ -53,6 +66,11 @@ help:
@echo " make format - Format code"
@echo " make lint - Check code style"
@echo " make all - Run format, lint and test"
@if [ "$(APPLE_SILICON)" = "1" ]; then \
echo ""; \
echo "\033[1;32m▶ PLATFORM INFORMATION:\033[0m"; \
echo " Apple Silicon detected - Docker commands will use PLATFORM=apple"; \
fi
# Check if in conda environment
check-conda:

View File

@ -198,6 +198,12 @@ make docker-restart-backend
make docker-restart-frontend
```
- Please notice that if you are using Apple Silicon and you want to run docker commands directly, you need to set the `PLATFORM` environment variable to `apple`. For example:
```bash
PLATFORM=apple docker-compose up -d --build
```
</details>

BIN
dependencies/graphrag-modified.tar.gz vendored Normal file

Binary file not shown.

View File

@ -4,7 +4,7 @@ services:
backend:
build:
context: .
dockerfile: Dockerfile.backend
dockerfile: Dockerfile.backend${PLATFORM:+.${PLATFORM}}
container_name: second-me-backend
restart: unless-stopped
ports:
@ -21,6 +21,7 @@ services:
# Environment variables
- LOCAL_APP_PORT=8002
- IN_DOCKER_ENV=1
- PLATFORM=${PLATFORM:-linux}
extra_hosts:
- "host.docker.internal:host-gateway"
deploy:

View File

@ -1,2 +1,8 @@
python lpm_kernel/L2/train.py --seed 42 --model_name_or_path "${MODEL_BASE_PATH}" --user_name "${USER_NAME}" --dataset_name "resources/L2/data/merged.json" --chat_template_format "chatml" --add_special_tokens False --append_concat_token False --max_seq_length 512 --num_train_epochs 3 --save_total_limit 2 --logging_steps 20 --log_level "info" --logging_strategy "steps" --save_strategy "epoch" --push_to_hub False --bf16 True --packing False --learning_rate 2e-4 --lr_scheduler_type "cosine" --weight_decay 1e-4 --max_grad_norm 0.3 --output_dir "${MODEL_PERSONAL_DIR}" --per_device_train_batch_size 2 --gradient_accumulation_steps 1 --gradient_checkpointing True --use_reentrant True --use_peft_lora True --lora_r 8 --lora_alpha 16 --lora_dropout 0.1 --lora_target_modules "all-linear" --use_4bit_quantization False --use_nested_quant False --bnb_4bit_compute_dtype "bfloat16"
CMD_ARGS="--seed 42 --model_name_or_path ${MODEL_BASE_PATH} --user_name ${USER_NAME} --dataset_name resources/L2/data/merged.json --chat_template_format chatml --add_special_tokens False --append_concat_token False --max_seq_length 512 --num_train_epochs 3 --save_total_limit 2 --logging_steps 20 --log_level info --logging_strategy steps --save_strategy epoch --push_to_hub False --packing False --learning_rate 2e-4 --lr_scheduler_type cosine --weight_decay 1e-4 --max_grad_norm 0.3 --output_dir ${MODEL_PERSONAL_DIR} --per_device_train_batch_size 2 --gradient_accumulation_steps 1 --gradient_checkpointing True --use_reentrant True --use_peft_lora True --lora_r 8 --lora_alpha 16 --lora_dropout 0.1 --lora_target_modules all-linear --use_4bit_quantization False --use_nested_quant False --bnb_4bit_compute_dtype bfloat16"
# Add BF16 option based on the platform
if [ "$PLATFORM" != "apple" ]; then
CMD_ARGS="$CMD_ARGS --bf16 True"
fi
python lpm_kernel/L2/train.py $CMD_ARGS

View File

@ -161,13 +161,20 @@ def create_and_prepare_model(args, data_args, training_args):
load_in_4bit=args.use_4bit_quantization,
)
else:
model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path,
quantization_config=bnb_config,
trust_remote_code=True,
attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
torch_dtype=torch.bfloat16,
)
if os.getenv("PLATFORM") != "apple":
model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path,
quantization_config=bnb_config,
trust_remote_code=True,
attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
torch_dtype=torch.bfloat16
)
else:
model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path,
quantization_config=bnb_config,
trust_remote_code=True
)
peft_config = None
chat_template = None