From f36f898d019523c82885538c767c92af9086b9f7 Mon Sep 17 00:00:00 2001 From: kpcto Date: Fri, 31 Jan 2025 22:06:28 +0000 Subject: [PATCH] docker support implemented - readme updated --- README.md | 46 +++++++++++- docker/.dockerignore | 12 +++ docker/Dockerfile | 62 ++++++++++++++++ docker/README.md | 151 ++++++++++++++++++++++++++++++++++++++ docker/docker-compose.yml | 90 +++++++++++++++++++++++ 5 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 docker/.dockerignore create mode 100644 docker/Dockerfile create mode 100644 docker/README.md create mode 100644 docker/docker-compose.yml diff --git a/README.md b/README.md index 0462431..d6fdddd 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -# 🤖 Last-In AI: Your Papers Please! +# 🤖 Last-In AI: Your Papers Please! - Work in Progress [![Python](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/) [![arXiv](https://img.shields.io/badge/arXiv-2401.00000-b31b1b.svg)](https://arxiv.org) [![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE) +[![Docker](https://img.shields.io/badge/docker-ready-brightgreen.svg)](docker/README.md) > *"Because reading research papers manually is so 2023!"* 🎯 @@ -25,10 +26,17 @@ src/ ├── data_acquisition/ # Paper fetching wizardry 📥 ├── orchestration/ # The puppet master 🎭 └── processing/ # PDF wrestling championship 📄 + +docker/ # Container configuration 🐳 +├── Dockerfile # Multi-stage build definition +├── docker-compose.yml # Service orchestration +└── README.md # Docker setup documentation ``` ## 🚀 Getting Started +### Method 1: Local Installation + 1. Clone this repository (because good things should be shared) ```bash git clone https://github.com/yourusername/lastin-ai.git @@ -46,6 +54,27 @@ cp .env.example .env # Edit .env with your favorite text editor ``` +### Method 2: Docker Installation 🐳 + +1. Clone and navigate to the repository +```bash +git clone https://github.com/yourusername/lastin-ai.git +cd lastin-ai +``` + +2. Set up environment variables +```bash +cp .env.example .env +# Edit .env with your configuration +``` + +3. Build and run with Docker Compose +```bash +docker compose -f docker/docker-compose.yml up --build +``` + +For detailed Docker setup and configuration options, see [Docker Documentation](docker/README.md). + ## 🎮 Usage ```python @@ -59,6 +88,21 @@ controller.process_papers("quantum_computing") # Now go grab a coffee, you've earned it! ``` +## 🛠️ Development + +### Running Tests +```bash +# Local +python -m pytest + +# Docker +docker compose -f docker/docker-compose.yml run --rm app python -m pytest +``` + +### Environment Variables +- See `.env.example` for required configuration +- Docker configurations are documented in `docker/README.md` + ## 🤝 Contributing Found a bug? Want to add a feature? Have a brilliant idea? We're all ears! diff --git a/docker/.dockerignore b/docker/.dockerignore new file mode 100644 index 0000000..5f01340 --- /dev/null +++ b/docker/.dockerignore @@ -0,0 +1,12 @@ +.git +.env* +__pycache__ +*.pyc +.pytest_cache +.coverage +htmlcov +.vscode +*.log +cache/* +data/* +logs/* \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..4613b2e --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,62 @@ +# Build stage +FROM python:3.8-slim-bullseye as builder + +# Install system dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + libpq-dev && \ + rm -rf /var/lib/apt/lists/* + +# Create non-root user +RUN groupadd -r appuser && \ + useradd -r -g appuser appuser + +WORKDIR /app +COPY requirements.txt . + +# Install dependencies as non-root user +USER appuser +RUN pip install --user --no-cache-dir -r requirements.txt + +# Production stage +FROM python:3.8-slim-bullseye as production + +# Runtime dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libpq5 \ + curl && \ + rm -rf /var/lib/apt/lists/* + +# Create non-root user +RUN groupadd -r appuser && \ + useradd -r -g appuser appuser && \ + mkdir -p /home/appuser/.local && \ + chown -R appuser:appuser /home/appuser + +WORKDIR /app + +# Copy application code +COPY --chown=appuser:appuser . . +COPY --from=builder --chown=appuser:appuser /home/appuser/.local /home/appuser/.local + +# Create and set permissions for runtime directories +RUN mkdir -p /app/cache /app/data /app/logs && \ + chown -R appuser:appuser /app + +USER appuser +ENV PATH=/home/appuser/.local/bin:$PATH +ENV PYTHONPATH=/app + +# Application environment variables +ENV CACHE_DIR=/app/cache \ + VECTOR_STORE_PATH=/app/data \ + LOG_PATH=/app/logs \ + PYTHONUNBUFFERED=1 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +CMD ["python", "-m", "src.orchestration.agent_controller"] \ No newline at end of file diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..6bffad8 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,151 @@ +# 🐳 Docker Architecture for Last-In AI + +## Overview +This document outlines the containerization strategy for the Last-In AI application, including multi-stage builds, service dependencies, and security considerations. + +## Base Image Selection +```dockerfile +# Build stage +FROM python:3.8-slim-bullseye as builder +# Slim-bullseye chosen for: +# - Minimal size while maintaining compatibility +# - Security updates and stability +# - Python 3.8+ compatibility +``` + +## Multi-Stage Build Strategy +1. **Builder Stage** + - Install build dependencies + - Install Python packages + - Compile any necessary components + +2. **Production Stage** + - Copy only necessary files from builder + - Minimal runtime dependencies + - Non-root user setup + +## Dependencies and Requirements +- Python packages from requirements.txt +- System dependencies: + - build-essential (for some Python packages) + - libpq-dev (for PostgreSQL) + - Optional: tesseract-ocr (for PDF processing) + +## Directory Structure +``` +/app +├── src/ # Application code +├── config/ # Configuration files +├── cache/ # Paper cache directory +├── data/ # Vector store data +└── logs/ # Application logs +``` + +## Environment Configuration +- Source: .env.example +- Runtime variables: + - Database credentials + - API keys + - Redis configuration + - Storage paths + - Security settings + +## Service Dependencies +1. **PostgreSQL** + - Primary database + - Persistent volume for data + - Environment: POSTGRES_* variables + +2. **Redis** + - Caching layer + - Port: 6379 + - No persistence needed + +## Security Considerations +1. Non-root user execution +2. Secret management via Docker secrets +3. Read-only filesystem where possible +4. Minimal base image +5. Regular security updates +6. Proper file permissions + +## Docker Compose Configuration +Services: +1. Main application +2. PostgreSQL database +3. Redis cache +4. Optional: Monitoring + +## Resource Management +- Memory limits +- CPU allocation +- Volume mounts for: + - Paper cache + - Vector store + - Logs + +## Health Checks +```dockerfile +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 +``` + +## Build and Run Commands +```bash +# Build +docker build --tag lastin-ai:prod --target production . + +# Run +docker run -p 8000:8000 \ + --env-file .env.prod \ + --volume ./cache:/app/cache \ + --volume ./data:/app/data \ + --volume ./logs:/app/logs \ + lastin-ai:prod +``` + +## File Exclusions (.dockerignore) +``` +.git +.env* +__pycache__ +*.pyc +.pytest_cache +.coverage +htmlcov +.vscode +*.log +cache/* +data/* +logs/* +``` + +## Implementation Steps +1. Switch to Code mode +2. Create Dockerfile +3. Create docker-compose.yml +4. Create .dockerignore +5. Test build and deployment +6. Implement health checks +7. Configure monitoring + +## Security Hardening Steps +1. Implement least privilege principle +2. Regular dependency updates +3. Image vulnerability scanning +4. Secrets management +5. Network security policies + +## Recommendations +1. Use multi-stage builds for minimal production image +2. Implement proper logging configuration +3. Regular security audits +4. Backup strategy for persistent data +5. Monitoring and alerting setup + +This containerization strategy ensures: +- Efficient builds +- Secure runtime +- Scalable deployment +- Proper resource management +- Easy maintenance \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..3ac07dc --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,90 @@ +version: '3.8' + +services: + app: + build: + context: .. + dockerfile: docker/Dockerfile + target: production + command: python -m src.orchestration.agent_controller + env_file: + - ../.env + ports: + - "8000:8000" + volumes: + - ../cache:/app/cache + - ../data:/app/data + - ../logs:/app/logs + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_started + environment: + - PYTHONPATH=/app + - PYTHONUNBUFFERED=1 + networks: + - lastin_network + deploy: + resources: + limits: + cpus: '1' + memory: 2G + reservations: + memory: 1G + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + restart: unless-stopped + + postgres: + image: postgres:15-alpine + volumes: + - postgres_data:/var/lib/postgresql/data + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} + networks: + - lastin_network + deploy: + resources: + limits: + cpus: '1' + memory: 1G + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"] + interval: 5s + timeout: 5s + retries: 5 + restart: unless-stopped + + redis: + image: redis:7-alpine + volumes: + - redis_data:/data + command: redis-server --save 60 1 --loglevel warning + networks: + - lastin_network + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 5s + retries: 5 + restart: unless-stopped + +networks: + lastin_network: + driver: bridge + +volumes: + postgres_data: + redis_data: \ No newline at end of file