From 1c95f47766d178b0b65f87394a16f7bc70c972ca Mon Sep 17 00:00:00 2001 From: knight Date: Thu, 8 Jan 2026 15:24:05 -0500 Subject: [PATCH] Add API rate limits --- README.md | 3 +++ config.py | 17 +++++++++++++++++ search_app.py | 45 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1ec3e7f..a5c0132 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,9 @@ Other tunables (defaults shown in compose): - `ELASTIC_VERIFY_CERTS` (set to `1` for real TLS verification) - `QDRANT_COLLECTION` (default `tlc-captions-full`) - `QDRANT_VECTOR_NAME` / `QDRANT_VECTOR_SIZE` / `QDRANT_EMBED_MODEL` +- `RATE_LIMIT_ENABLED` (default `1`) +- `RATE_LIMIT_REQUESTS` (default `60`) +- `RATE_LIMIT_WINDOW_SECONDS` (default `60`) Port 8080 on the host is forwarded to the app. Mount `./data` (read-only) if you want local fallbacks for metrics (`LOCAL_DATA_DIR=/app/data/video_metadata`); otherwise the app will rely purely on the remote backends. Stop the container with `docker compose down`. diff --git a/config.py b/config.py index 1faf15a..4d7a61f 100644 --- a/config.py +++ b/config.py @@ -7,6 +7,9 @@ Environment Variables: ELASTIC_INDEX: Target index name (default: this_little_corner_py). LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata). YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube. + RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1). + RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60). + RATE_LIMIT_WINDOW_SECONDS: Window size in seconds (default: 60). """ from __future__ import annotations @@ -53,11 +56,19 @@ class YoutubeSettings: api_key: Optional[str] +@dataclass(frozen=True) +class RateLimitSettings: + enabled: bool + requests: int + window_seconds: int + + @dataclass(frozen=True) class AppConfig: elastic: ElasticSettings data: DataSettings youtube: YoutubeSettings + rate_limit: RateLimitSettings qdrant_url: str qdrant_collection: str qdrant_vector_name: Optional[str] @@ -94,10 +105,16 @@ def load_config() -> AppConfig: ) data = DataSettings(root=data_root) youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY")) + rate_limit = RateLimitSettings( + enabled=_env("RATE_LIMIT_ENABLED", "1") in {"1", "true", "True"}, + requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0), + window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1), + ) return AppConfig( elastic=elastic, data=data, youtube=youtube, + rate_limit=rate_limit, qdrant_url=_env("QDRANT_URL", "http://localhost:6333"), qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"), qdrant_vector_name=_env("QDRANT_VECTOR_NAME"), diff --git a/search_app.py b/search_app.py index 8c32be3..71ec970 100644 --- a/search_app.py +++ b/search_app.py @@ -18,10 +18,12 @@ import logging import os import re from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple +from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple -from collections import Counter, deque +from collections import Counter, defaultdict, deque from datetime import datetime +from threading import Lock +from time import monotonic from flask import Flask, jsonify, request, send_from_directory @@ -41,6 +43,23 @@ MAX_QUERY_SIZE = 100 MAX_OFFSET = 10000 DEFAULT_ELASTIC_TIMEOUT = int(os.environ.get("ELASTIC_TIMEOUT_SECONDS", "30")) +_RATE_LIMIT_BUCKETS: Dict[str, Deque[float]] = defaultdict(deque) +_RATE_LIMIT_LOCK = Lock() + + +def _client_rate_key() -> str: + forwarded = request.headers.get("X-Forwarded-For", "") + if forwarded: + return forwarded.split(",")[0].strip() + return request.headers.get("X-Real-IP") or request.remote_addr or "unknown" + + +def _rate_limited_response(retry_after: int): + response = jsonify({"error": "rate_limited", "retryAfter": retry_after}) + response.status_code = 429 + response.headers["Retry-After"] = str(retry_after) + return response + def sanitize_query_string(query: str) -> str: """ Sanitize user input for Elasticsearch query_string queries. @@ -902,6 +921,28 @@ def create_app(config: AppConfig = CONFIG) -> Flask: ) return response + @app.before_request + def enforce_rate_limit(): + if not config.rate_limit.enabled: + return None + if not request.path.startswith("/api/"): + return None + limit = config.rate_limit.requests + window_seconds = config.rate_limit.window_seconds + if limit <= 0 or window_seconds <= 0: + return None + now = monotonic() + key = _client_rate_key() + with _RATE_LIMIT_LOCK: + bucket = _RATE_LIMIT_BUCKETS[key] + while bucket and now - bucket[0] > window_seconds: + bucket.popleft() + if len(bucket) >= limit: + retry_after = max(1, int(window_seconds - (now - bucket[0]))) + return _rate_limited_response(retry_after) + bucket.append(now) + return None + client = _ensure_client(config) index = config.elastic.index