From 1ac076e5f2a923ca8986b8b5409158ea1e2e820c Mon Sep 17 00:00:00 2001 From: knight Date: Thu, 8 Jan 2026 15:42:21 -0500 Subject: [PATCH] Harden search responses --- search_app.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/search_app.py b/search_app.py index 71ec970..d4bc1d2 100644 --- a/search_app.py +++ b/search_app.py @@ -1187,7 +1187,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask: year = request.args.get("year", "", type=str) or None sort = request.args.get("sort", "relevant", type=str) page = max(request.args.get("page", 0, type=int), 0) - size = max(request.args.get("size", 10, type=int), 1) + size = min(max(request.args.get("size", 10, type=int), 1), MAX_QUERY_SIZE) def parse_flag(name: str, default: bool = True) -> bool: value = request.args.get(name) @@ -1215,6 +1215,10 @@ def create_app(config: AppConfig = CONFIG) -> Flask: include_external=include_external, ) start = page * size + if start >= MAX_OFFSET: + return jsonify({"error": "offset_too_large", "maxOffset": MAX_OFFSET}), 400 + if start + size > MAX_OFFSET: + size = max(1, MAX_OFFSET - start) if config.elastic.debug: LOGGER.info( "Elasticsearch search request: %s", @@ -1261,14 +1265,10 @@ def create_app(config: AppConfig = CONFIG) -> Flask: for value in (highlight_map.get("transcript_secondary_full", []) or []) ] - title_html = ( - highlight_map.get("title") - or [source.get("title") or "Untitled"] - )[0] - description_html = ( - highlight_map.get("description") - or [source.get("description") or ""] - )[0] + title_highlight = highlight_map.get("title") or [] + description_highlight = highlight_map.get("description") or [] + title_html = title_highlight[0] if title_highlight else None + description_html = description_highlight[0] if description_highlight else None documents.append( { "video_id": source.get("video_id"),