Second-Me/docker-compose-gpu.yml

services:
  backend:
    build:
      context: .
      dockerfile: ${DOCKER_BACKEND_DOCKERFILE:-Dockerfile.backend.cuda}
    container_name: second-me-backend
    restart: unless-stopped
    ports:
      - "8002:8002"
      - "8080:8080"
    volumes:
      - ./data:/app/data
      - ./logs:/app/logs
      - ./run:/app/run
      - ./resources:/app/resources
      - ./docker:/app/docker
      - ./.env:/app/.env
      - llama-cpp-build:/app/llama.cpp/build  # Persist the llama.cpp build
    environment:
      # Environment variables
      - LOCAL_APP_PORT=8002
      - IN_DOCKER_ENV=1
      - PLATFORM=${PLATFORM:-linux}
      - USE_CUDA=1
    extra_hosts:
      - "host.docker.internal:host-gateway"
    deploy:
      resources:
        limits:
          # Set container memory limit to 64GB
          memory: 64G
        reservations:
          # Memory reservation
          memory: 6G
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    networks:
      - second-me-network

  frontend:
    build:
      context: .
      dockerfile: Dockerfile.frontend
    container_name: second-me-frontend
    restart: unless-stopped
    ports:
      - "3000:3000"
    volumes:
      - ./logs:/app/logs
      - ./resources:/app/resources
    environment:
      - VITE_API_BASE_URL=http://backend:8002
    depends_on:
      - backend
    deploy:
      resources:
        limits:
          # Set container memory limit to 2GB
          memory: 2G
        reservations:
          # Memory reservation
          memory: 1G
    networks:
      - second-me-network

networks:
  second-me-network:
    driver: bridge

volumes:
  llama-cpp-build:
    driver: local