Add API rate limits
This commit is contained in:
parent
6a3d1ee491
commit
1c95f47766
@ -102,6 +102,9 @@ Other tunables (defaults shown in compose):
|
|||||||
- `ELASTIC_VERIFY_CERTS` (set to `1` for real TLS verification)
|
- `ELASTIC_VERIFY_CERTS` (set to `1` for real TLS verification)
|
||||||
- `QDRANT_COLLECTION` (default `tlc-captions-full`)
|
- `QDRANT_COLLECTION` (default `tlc-captions-full`)
|
||||||
- `QDRANT_VECTOR_NAME` / `QDRANT_VECTOR_SIZE` / `QDRANT_EMBED_MODEL`
|
- `QDRANT_VECTOR_NAME` / `QDRANT_VECTOR_SIZE` / `QDRANT_EMBED_MODEL`
|
||||||
|
- `RATE_LIMIT_ENABLED` (default `1`)
|
||||||
|
- `RATE_LIMIT_REQUESTS` (default `60`)
|
||||||
|
- `RATE_LIMIT_WINDOW_SECONDS` (default `60`)
|
||||||
|
|
||||||
Port 8080 on the host is forwarded to the app. Mount `./data` (read-only) if you want local fallbacks for metrics (`LOCAL_DATA_DIR=/app/data/video_metadata`); otherwise the app will rely purely on the remote backends. Stop the container with `docker compose down`.
|
Port 8080 on the host is forwarded to the app. Mount `./data` (read-only) if you want local fallbacks for metrics (`LOCAL_DATA_DIR=/app/data/video_metadata`); otherwise the app will rely purely on the remote backends. Stop the container with `docker compose down`.
|
||||||
|
|
||||||
|
|||||||
17
config.py
17
config.py
@ -7,6 +7,9 @@ Environment Variables:
|
|||||||
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
|
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
|
||||||
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
|
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
|
||||||
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
|
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
|
||||||
|
RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1).
|
||||||
|
RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60).
|
||||||
|
RATE_LIMIT_WINDOW_SECONDS: Window size in seconds (default: 60).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@ -53,11 +56,19 @@ class YoutubeSettings:
|
|||||||
api_key: Optional[str]
|
api_key: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RateLimitSettings:
|
||||||
|
enabled: bool
|
||||||
|
requests: int
|
||||||
|
window_seconds: int
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class AppConfig:
|
class AppConfig:
|
||||||
elastic: ElasticSettings
|
elastic: ElasticSettings
|
||||||
data: DataSettings
|
data: DataSettings
|
||||||
youtube: YoutubeSettings
|
youtube: YoutubeSettings
|
||||||
|
rate_limit: RateLimitSettings
|
||||||
qdrant_url: str
|
qdrant_url: str
|
||||||
qdrant_collection: str
|
qdrant_collection: str
|
||||||
qdrant_vector_name: Optional[str]
|
qdrant_vector_name: Optional[str]
|
||||||
@ -94,10 +105,16 @@ def load_config() -> AppConfig:
|
|||||||
)
|
)
|
||||||
data = DataSettings(root=data_root)
|
data = DataSettings(root=data_root)
|
||||||
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
|
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
|
||||||
|
rate_limit = RateLimitSettings(
|
||||||
|
enabled=_env("RATE_LIMIT_ENABLED", "1") in {"1", "true", "True"},
|
||||||
|
requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0),
|
||||||
|
window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1),
|
||||||
|
)
|
||||||
return AppConfig(
|
return AppConfig(
|
||||||
elastic=elastic,
|
elastic=elastic,
|
||||||
data=data,
|
data=data,
|
||||||
youtube=youtube,
|
youtube=youtube,
|
||||||
|
rate_limit=rate_limit,
|
||||||
qdrant_url=_env("QDRANT_URL", "http://localhost:6333"),
|
qdrant_url=_env("QDRANT_URL", "http://localhost:6333"),
|
||||||
qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"),
|
qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"),
|
||||||
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
|
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
|
||||||
|
|||||||
@ -18,10 +18,12 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||||
|
|
||||||
from collections import Counter, deque
|
from collections import Counter, defaultdict, deque
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from threading import Lock
|
||||||
|
from time import monotonic
|
||||||
|
|
||||||
from flask import Flask, jsonify, request, send_from_directory
|
from flask import Flask, jsonify, request, send_from_directory
|
||||||
|
|
||||||
@ -41,6 +43,23 @@ MAX_QUERY_SIZE = 100
|
|||||||
MAX_OFFSET = 10000
|
MAX_OFFSET = 10000
|
||||||
DEFAULT_ELASTIC_TIMEOUT = int(os.environ.get("ELASTIC_TIMEOUT_SECONDS", "30"))
|
DEFAULT_ELASTIC_TIMEOUT = int(os.environ.get("ELASTIC_TIMEOUT_SECONDS", "30"))
|
||||||
|
|
||||||
|
_RATE_LIMIT_BUCKETS: Dict[str, Deque[float]] = defaultdict(deque)
|
||||||
|
_RATE_LIMIT_LOCK = Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _client_rate_key() -> str:
|
||||||
|
forwarded = request.headers.get("X-Forwarded-For", "")
|
||||||
|
if forwarded:
|
||||||
|
return forwarded.split(",")[0].strip()
|
||||||
|
return request.headers.get("X-Real-IP") or request.remote_addr or "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def _rate_limited_response(retry_after: int):
|
||||||
|
response = jsonify({"error": "rate_limited", "retryAfter": retry_after})
|
||||||
|
response.status_code = 429
|
||||||
|
response.headers["Retry-After"] = str(retry_after)
|
||||||
|
return response
|
||||||
|
|
||||||
def sanitize_query_string(query: str) -> str:
|
def sanitize_query_string(query: str) -> str:
|
||||||
"""
|
"""
|
||||||
Sanitize user input for Elasticsearch query_string queries.
|
Sanitize user input for Elasticsearch query_string queries.
|
||||||
@ -902,6 +921,28 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@app.before_request
|
||||||
|
def enforce_rate_limit():
|
||||||
|
if not config.rate_limit.enabled:
|
||||||
|
return None
|
||||||
|
if not request.path.startswith("/api/"):
|
||||||
|
return None
|
||||||
|
limit = config.rate_limit.requests
|
||||||
|
window_seconds = config.rate_limit.window_seconds
|
||||||
|
if limit <= 0 or window_seconds <= 0:
|
||||||
|
return None
|
||||||
|
now = monotonic()
|
||||||
|
key = _client_rate_key()
|
||||||
|
with _RATE_LIMIT_LOCK:
|
||||||
|
bucket = _RATE_LIMIT_BUCKETS[key]
|
||||||
|
while bucket and now - bucket[0] > window_seconds:
|
||||||
|
bucket.popleft()
|
||||||
|
if len(bucket) >= limit:
|
||||||
|
retry_after = max(1, int(window_seconds - (now - bucket[0])))
|
||||||
|
return _rate_limited_response(retry_after)
|
||||||
|
bucket.append(now)
|
||||||
|
return None
|
||||||
|
|
||||||
client = _ensure_client(config)
|
client = _ensure_client(config)
|
||||||
index = config.elastic.index
|
index = config.elastic.index
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user