TLC-Search/config.py
2026-01-08 15:24:05 -05:00

127 lines
3.9 KiB
Python

"""
Centralised configuration helpers for the Python search toolkit.
Environment Variables:
ELASTIC_URL: Base URL to the Elasticsearch node (default: http://localhost:9200).
ELASTIC_USERNAME / ELASTIC_PASSWORD: Optional basic auth credentials.
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1).
RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60).
RATE_LIMIT_WINDOW_SECONDS: Window size in seconds (default: 60).
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
# Load .env file if it exists
try:
from dotenv import load_dotenv
import logging
_logger = logging.getLogger(__name__)
_env_path = Path(__file__).parent / ".env"
if _env_path.exists():
_logger.info("Loading .env from: %s", _env_path)
result = load_dotenv(_env_path, override=True)
_logger.info("load_dotenv result: %s", result)
except ImportError:
pass # python-dotenv not installed
@dataclass(frozen=True)
class ElasticSettings:
url: str
username: Optional[str]
password: Optional[str]
index: str
ca_cert: Optional[Path]
verify_certs: bool
api_key: Optional[str]
debug: bool
@dataclass(frozen=True)
class DataSettings:
root: Path
@dataclass(frozen=True)
class YoutubeSettings:
api_key: Optional[str]
@dataclass(frozen=True)
class RateLimitSettings:
enabled: bool
requests: int
window_seconds: int
@dataclass(frozen=True)
class AppConfig:
elastic: ElasticSettings
data: DataSettings
youtube: YoutubeSettings
rate_limit: RateLimitSettings
qdrant_url: str
qdrant_collection: str
qdrant_vector_name: Optional[str]
qdrant_vector_size: int
qdrant_embed_model: str
def _env(name: str, default: Optional[str] = None) -> Optional[str]:
"""Return an environment variable value with optional default."""
value = os.environ.get(name)
if value is None:
return default
stripped = value.strip()
return stripped or default
def load_config() -> AppConfig:
"""Collect configuration from environment variables."""
elastic = ElasticSettings(
url=_env("ELASTIC_URL", "http://localhost:9200"),
username=_env("ELASTIC_USERNAME"),
password=_env("ELASTIC_PASSWORD"),
index=_env("ELASTIC_INDEX", "this_little_corner_py"),
ca_cert=Path(_env("ELASTIC_CA_CERT")).expanduser() if _env("ELASTIC_CA_CERT") else None,
verify_certs=_env("ELASTIC_VERIFY_CERTS", "1") not in {"0", "false", "False"},
api_key=_env("ELASTIC_API_KEY"),
debug=_env("ELASTIC_DEBUG", "0") in {"1", "true", "True"},
)
data_root = Path(
_env(
"LOCAL_DATA_DIR",
Path(__file__).resolve().parents[1] / "data" / "video_metadata",
)
)
data = DataSettings(root=data_root)
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
rate_limit = RateLimitSettings(
enabled=_env("RATE_LIMIT_ENABLED", "1") in {"1", "true", "True"},
requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0),
window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1),
)
return AppConfig(
elastic=elastic,
data=data,
youtube=youtube,
rate_limit=rate_limit,
qdrant_url=_env("QDRANT_URL", "http://localhost:6333"),
qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"),
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
qdrant_vector_size=int(_env("QDRANT_VECTOR_SIZE", "1024")),
qdrant_embed_model=_env("QDRANT_EMBED_MODEL", "BAAI/bge-large-en-v1.5"),
)
CONFIG = load_config()