Add API rate limits

This commit is contained in:
knight 2026-01-08 15:24:05 -05:00
parent 6a3d1ee491
commit 1c95f47766
3 changed files with 63 additions and 2 deletions

View File

@ -102,6 +102,9 @@ Other tunables (defaults shown in compose):
- `ELASTIC_VERIFY_CERTS` (set to `1` for real TLS verification)
- `QDRANT_COLLECTION` (default `tlc-captions-full`)
- `QDRANT_VECTOR_NAME` / `QDRANT_VECTOR_SIZE` / `QDRANT_EMBED_MODEL`
- `RATE_LIMIT_ENABLED` (default `1`)
- `RATE_LIMIT_REQUESTS` (default `60`)
- `RATE_LIMIT_WINDOW_SECONDS` (default `60`)
Port 8080 on the host is forwarded to the app. Mount `./data` (read-only) if you want local fallbacks for metrics (`LOCAL_DATA_DIR=/app/data/video_metadata`); otherwise the app will rely purely on the remote backends. Stop the container with `docker compose down`.

View File

@ -7,6 +7,9 @@ Environment Variables:
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1).
RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60).
RATE_LIMIT_WINDOW_SECONDS: Window size in seconds (default: 60).
"""
from __future__ import annotations
@ -53,11 +56,19 @@ class YoutubeSettings:
api_key: Optional[str]
@dataclass(frozen=True)
class RateLimitSettings:
enabled: bool
requests: int
window_seconds: int
@dataclass(frozen=True)
class AppConfig:
elastic: ElasticSettings
data: DataSettings
youtube: YoutubeSettings
rate_limit: RateLimitSettings
qdrant_url: str
qdrant_collection: str
qdrant_vector_name: Optional[str]
@ -94,10 +105,16 @@ def load_config() -> AppConfig:
)
data = DataSettings(root=data_root)
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
rate_limit = RateLimitSettings(
enabled=_env("RATE_LIMIT_ENABLED", "1") in {"1", "true", "True"},
requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0),
window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1),
)
return AppConfig(
elastic=elastic,
data=data,
youtube=youtube,
rate_limit=rate_limit,
qdrant_url=_env("QDRANT_URL", "http://localhost:6333"),
qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"),
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),

View File

@ -18,10 +18,12 @@ import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
from collections import Counter, deque
from collections import Counter, defaultdict, deque
from datetime import datetime
from threading import Lock
from time import monotonic
from flask import Flask, jsonify, request, send_from_directory
@ -41,6 +43,23 @@ MAX_QUERY_SIZE = 100
MAX_OFFSET = 10000
DEFAULT_ELASTIC_TIMEOUT = int(os.environ.get("ELASTIC_TIMEOUT_SECONDS", "30"))
_RATE_LIMIT_BUCKETS: Dict[str, Deque[float]] = defaultdict(deque)
_RATE_LIMIT_LOCK = Lock()
def _client_rate_key() -> str:
forwarded = request.headers.get("X-Forwarded-For", "")
if forwarded:
return forwarded.split(",")[0].strip()
return request.headers.get("X-Real-IP") or request.remote_addr or "unknown"
def _rate_limited_response(retry_after: int):
response = jsonify({"error": "rate_limited", "retryAfter": retry_after})
response.status_code = 429
response.headers["Retry-After"] = str(retry_after)
return response
def sanitize_query_string(query: str) -> str:
"""
Sanitize user input for Elasticsearch query_string queries.
@ -902,6 +921,28 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
)
return response
@app.before_request
def enforce_rate_limit():
if not config.rate_limit.enabled:
return None
if not request.path.startswith("/api/"):
return None
limit = config.rate_limit.requests
window_seconds = config.rate_limit.window_seconds
if limit <= 0 or window_seconds <= 0:
return None
now = monotonic()
key = _client_rate_key()
with _RATE_LIMIT_LOCK:
bucket = _RATE_LIMIT_BUCKETS[key]
while bucket and now - bucket[0] > window_seconds:
bucket.popleft()
if len(bucket) >= limit:
retry_after = max(1, int(window_seconds - (now - bucket[0])))
return _rate_limited_response(retry_after)
bucket.append(now)
return None
client = _ensure_client(config)
index = config.elastic.index