Add API rate limits
This commit is contained in:
parent
6a3d1ee491
commit
1c95f47766
@ -102,6 +102,9 @@ Other tunables (defaults shown in compose):
|
||||
- `ELASTIC_VERIFY_CERTS` (set to `1` for real TLS verification)
|
||||
- `QDRANT_COLLECTION` (default `tlc-captions-full`)
|
||||
- `QDRANT_VECTOR_NAME` / `QDRANT_VECTOR_SIZE` / `QDRANT_EMBED_MODEL`
|
||||
- `RATE_LIMIT_ENABLED` (default `1`)
|
||||
- `RATE_LIMIT_REQUESTS` (default `60`)
|
||||
- `RATE_LIMIT_WINDOW_SECONDS` (default `60`)
|
||||
|
||||
Port 8080 on the host is forwarded to the app. Mount `./data` (read-only) if you want local fallbacks for metrics (`LOCAL_DATA_DIR=/app/data/video_metadata`); otherwise the app will rely purely on the remote backends. Stop the container with `docker compose down`.
|
||||
|
||||
|
||||
17
config.py
17
config.py
@ -7,6 +7,9 @@ Environment Variables:
|
||||
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
|
||||
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
|
||||
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
|
||||
RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1).
|
||||
RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60).
|
||||
RATE_LIMIT_WINDOW_SECONDS: Window size in seconds (default: 60).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@ -53,11 +56,19 @@ class YoutubeSettings:
|
||||
api_key: Optional[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RateLimitSettings:
|
||||
enabled: bool
|
||||
requests: int
|
||||
window_seconds: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AppConfig:
|
||||
elastic: ElasticSettings
|
||||
data: DataSettings
|
||||
youtube: YoutubeSettings
|
||||
rate_limit: RateLimitSettings
|
||||
qdrant_url: str
|
||||
qdrant_collection: str
|
||||
qdrant_vector_name: Optional[str]
|
||||
@ -94,10 +105,16 @@ def load_config() -> AppConfig:
|
||||
)
|
||||
data = DataSettings(root=data_root)
|
||||
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
|
||||
rate_limit = RateLimitSettings(
|
||||
enabled=_env("RATE_LIMIT_ENABLED", "1") in {"1", "true", "True"},
|
||||
requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0),
|
||||
window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1),
|
||||
)
|
||||
return AppConfig(
|
||||
elastic=elastic,
|
||||
data=data,
|
||||
youtube=youtube,
|
||||
rate_limit=rate_limit,
|
||||
qdrant_url=_env("QDRANT_URL", "http://localhost:6333"),
|
||||
qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"),
|
||||
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
|
||||
|
||||
@ -18,10 +18,12 @@ import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||
|
||||
from collections import Counter, deque
|
||||
from collections import Counter, defaultdict, deque
|
||||
from datetime import datetime
|
||||
from threading import Lock
|
||||
from time import monotonic
|
||||
|
||||
from flask import Flask, jsonify, request, send_from_directory
|
||||
|
||||
@ -41,6 +43,23 @@ MAX_QUERY_SIZE = 100
|
||||
MAX_OFFSET = 10000
|
||||
DEFAULT_ELASTIC_TIMEOUT = int(os.environ.get("ELASTIC_TIMEOUT_SECONDS", "30"))
|
||||
|
||||
_RATE_LIMIT_BUCKETS: Dict[str, Deque[float]] = defaultdict(deque)
|
||||
_RATE_LIMIT_LOCK = Lock()
|
||||
|
||||
|
||||
def _client_rate_key() -> str:
|
||||
forwarded = request.headers.get("X-Forwarded-For", "")
|
||||
if forwarded:
|
||||
return forwarded.split(",")[0].strip()
|
||||
return request.headers.get("X-Real-IP") or request.remote_addr or "unknown"
|
||||
|
||||
|
||||
def _rate_limited_response(retry_after: int):
|
||||
response = jsonify({"error": "rate_limited", "retryAfter": retry_after})
|
||||
response.status_code = 429
|
||||
response.headers["Retry-After"] = str(retry_after)
|
||||
return response
|
||||
|
||||
def sanitize_query_string(query: str) -> str:
|
||||
"""
|
||||
Sanitize user input for Elasticsearch query_string queries.
|
||||
@ -902,6 +921,28 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
||||
)
|
||||
return response
|
||||
|
||||
@app.before_request
|
||||
def enforce_rate_limit():
|
||||
if not config.rate_limit.enabled:
|
||||
return None
|
||||
if not request.path.startswith("/api/"):
|
||||
return None
|
||||
limit = config.rate_limit.requests
|
||||
window_seconds = config.rate_limit.window_seconds
|
||||
if limit <= 0 or window_seconds <= 0:
|
||||
return None
|
||||
now = monotonic()
|
||||
key = _client_rate_key()
|
||||
with _RATE_LIMIT_LOCK:
|
||||
bucket = _RATE_LIMIT_BUCKETS[key]
|
||||
while bucket and now - bucket[0] > window_seconds:
|
||||
bucket.popleft()
|
||||
if len(bucket) >= limit:
|
||||
retry_after = max(1, int(window_seconds - (now - bucket[0])))
|
||||
return _rate_limited_response(retry_after)
|
||||
bucket.append(now)
|
||||
return None
|
||||
|
||||
client = _ensure_client(config)
|
||||
index = config.elastic.index
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user