141 lines
4.6 KiB
Python
141 lines
4.6 KiB
Python
"""
|
|
Centralised configuration helpers for the Python search toolkit.
|
|
|
|
Environment Variables:
|
|
ELASTIC_URL: Base URL to the Elasticsearch node (default: http://localhost:9200).
|
|
ELASTIC_USERNAME / ELASTIC_PASSWORD: Optional basic auth credentials.
|
|
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
|
|
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
|
|
CHANNELS_PATH: Path to the canonical channel list (default: ./channels.yml).
|
|
RSS_FEED_URL: Public URL/path for the unified RSS feed (default: /rss/youtube-unified).
|
|
RSS_FEED_UPSTREAM: Base URL to proxy feed requests (default: http://localhost:8097).
|
|
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
|
|
RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1).
|
|
RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60).
|
|
RATE_LIMIT_WINDOW_SECONDS: Window size in seconds (default: 60).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
# Load .env file if it exists
|
|
try:
|
|
from dotenv import load_dotenv
|
|
import logging
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
_env_path = Path(__file__).parent / ".env"
|
|
if _env_path.exists():
|
|
_logger.info("Loading .env from: %s", _env_path)
|
|
result = load_dotenv(_env_path, override=True)
|
|
_logger.info("load_dotenv result: %s", result)
|
|
except ImportError:
|
|
pass # python-dotenv not installed
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ElasticSettings:
|
|
url: str
|
|
username: Optional[str]
|
|
password: Optional[str]
|
|
index: str
|
|
ca_cert: Optional[Path]
|
|
verify_certs: bool
|
|
api_key: Optional[str]
|
|
debug: bool
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class DataSettings:
|
|
root: Path
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class YoutubeSettings:
|
|
api_key: Optional[str]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RateLimitSettings:
|
|
enabled: bool
|
|
requests: int
|
|
window_seconds: int
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AppConfig:
|
|
elastic: ElasticSettings
|
|
data: DataSettings
|
|
youtube: YoutubeSettings
|
|
rate_limit: RateLimitSettings
|
|
qdrant_url: str
|
|
qdrant_collection: str
|
|
qdrant_vector_name: Optional[str]
|
|
qdrant_vector_size: int
|
|
qdrant_embed_model: str
|
|
channels_path: Path
|
|
rss_feed_url: str
|
|
rss_feed_upstream: str
|
|
|
|
|
|
def _env(name: str, default: Optional[str] = None) -> Optional[str]:
|
|
"""Return an environment variable value with optional default."""
|
|
value = os.environ.get(name)
|
|
if value is None:
|
|
return default
|
|
stripped = value.strip()
|
|
return stripped or default
|
|
|
|
|
|
def load_config() -> AppConfig:
|
|
"""Collect configuration from environment variables."""
|
|
elastic = ElasticSettings(
|
|
url=_env("ELASTIC_URL", "http://localhost:9200"),
|
|
username=_env("ELASTIC_USERNAME"),
|
|
password=_env("ELASTIC_PASSWORD"),
|
|
index=_env("ELASTIC_INDEX", "this_little_corner_py"),
|
|
ca_cert=Path(_env("ELASTIC_CA_CERT")).expanduser() if _env("ELASTIC_CA_CERT") else None,
|
|
verify_certs=_env("ELASTIC_VERIFY_CERTS", "1") not in {"0", "false", "False"},
|
|
api_key=_env("ELASTIC_API_KEY"),
|
|
debug=_env("ELASTIC_DEBUG", "0") in {"1", "true", "True"},
|
|
)
|
|
data_root = Path(
|
|
_env(
|
|
"LOCAL_DATA_DIR",
|
|
Path(__file__).resolve().parents[1] / "data" / "video_metadata",
|
|
)
|
|
)
|
|
data = DataSettings(root=data_root)
|
|
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
|
|
rate_limit = RateLimitSettings(
|
|
enabled=_env("RATE_LIMIT_ENABLED", "1") in {"1", "true", "True"},
|
|
requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0),
|
|
window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1),
|
|
)
|
|
channels_path = Path(
|
|
_env("CHANNELS_PATH", str(Path(__file__).parent / "channels.yml"))
|
|
).expanduser()
|
|
rss_feed_url = _env("RSS_FEED_URL", "/rss/youtube-unified")
|
|
rss_feed_upstream = _env("RSS_FEED_UPSTREAM", "http://localhost:8097")
|
|
return AppConfig(
|
|
elastic=elastic,
|
|
data=data,
|
|
youtube=youtube,
|
|
rate_limit=rate_limit,
|
|
qdrant_url=_env("QDRANT_URL", "http://localhost:6333"),
|
|
qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"),
|
|
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
|
|
qdrant_vector_size=int(_env("QDRANT_VECTOR_SIZE", "1024")),
|
|
qdrant_embed_model=_env("QDRANT_EMBED_MODEL", "BAAI/bge-large-en-v1.5"),
|
|
channels_path=channels_path,
|
|
rss_feed_url=rss_feed_url or "",
|
|
rss_feed_upstream=rss_feed_upstream or "",
|
|
)
|
|
|
|
|
|
CONFIG = load_config()
|