diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0d1dd24 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +.git +.gitignore +.venv +__pycache__ +*.pyc +*.pyo +.DS_Store +node_modules +data +videos +*.log diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2e8d7a4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +# System deps kept lean to support torch/sentence-transformers wheels. +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential git curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +# Copy the package into /app/python_app so `python -m python_app.search_app` works. +COPY . /app/python_app + +ENV ELASTIC_URL=http://elasticsearch:9200 \ + ELASTIC_INDEX=this_little_corner_py \ + ELASTIC_VERIFY_CERTS=0 \ + QDRANT_URL=http://qdrant:6333 \ + QDRANT_COLLECTION=tlc-captions-full \ + QDRANT_VECTOR_NAME= \ + QDRANT_VECTOR_SIZE=1024 \ + QDRANT_EMBED_MODEL=BAAI/bge-large-en-v1.5 \ + LOCAL_DATA_DIR=/app/data/video_metadata + +EXPOSE 8080 + +WORKDIR /app +CMD ["python", "-m", "python_app.search_app"] diff --git a/README.md b/README.md index 126e74b..49ab704 100644 --- a/README.md +++ b/README.md @@ -85,3 +85,26 @@ Visit and you’ll see a barebones UI that: Feel free to expand on this scaffold—add proper logging, schedule transcript updates, or flesh out the UI—once you’re happy with the baseline behaviour. + +## Run with Docker Compose + +A quick single-node stack (app + Elasticsearch + Qdrant) is included: + +```bash +docker compose build +docker compose up +``` + +Services: +- **app** (port 8080): Flask UI/API, embeds queries on demand (downloads the model on first run). +- **elasticsearch** (port 9200): single node, security disabled for local use. +- **qdrant** (port 6333): vector index used by `/vector-search`. + +Key environment wiring (see `docker-compose.yml` for defaults): +- `ELASTIC_URL=http://elasticsearch:9200` +- `ELASTIC_INDEX=this_little_corner_py` +- `QDRANT_URL=http://qdrant:6333` +- `QDRANT_COLLECTION=tlc-captions-full` +- `LOCAL_DATA_DIR=/app/data/video_metadata` (mounted from `./data`) + +Mount `./data` (read-only) if you want local fallbacks for metrics; otherwise the app relies entirely on Elasticsearch/Qdrant. Stop the stack with `docker compose down` (add `-v` to clear ES/Qdrant volumes). diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6c337a5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,51 @@ +version: "3.9" + +services: + app: + build: + context: . + dockerfile: Dockerfile + ports: + - "8080:8080" + environment: + ELASTIC_URL: http://elasticsearch:9200 + ELASTIC_INDEX: this_little_corner_py + ELASTIC_VERIFY_CERTS: "0" + QDRANT_URL: http://qdrant:6333 + QDRANT_COLLECTION: tlc-captions-full + QDRANT_VECTOR_NAME: "" + QDRANT_VECTOR_SIZE: "1024" + QDRANT_EMBED_MODEL: BAAI/bge-large-en-v1.5 + LOCAL_DATA_DIR: /app/data/video_metadata + volumes: + - ./data:/app/data:ro + depends_on: + - elasticsearch + - qdrant + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.17.19 + environment: + discovery.type: single-node + xpack.security.enabled: "false" + ES_JAVA_OPTS: -Xms1g -Xmx1g + cluster.routing.allocation.disk.threshold_enabled: "false" + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - esdata:/usr/share/elasticsearch/data + ports: + - "9200:9200" + + qdrant: + image: qdrant/qdrant:v1.9.1 + ports: + - "6333:6333" + volumes: + - qdrant_storage:/qdrant/storage + +volumes: + esdata: + qdrant_storage: