From 2efe5e0c799d2a177a520506a22b7fcb037ffe47 Mon Sep 17 00:00:00 2001
From: knight <gitea@ghost.tel>
Date: Wed, 5 Nov 2025 00:54:50 -0500
Subject: [PATCH] Fix secondary transcript timestamps by indexing parts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, secondary transcript parts were not being indexed
into Elasticsearch, causing the frontend to receive empty arrays
and display zero timestamps.

Changes:
- Add transcript_secondary_parts to Elasticsearch mapping
- Include secondary parts in bulk indexing actions
- Build secondary_full text from parts if not provided
- Match primary transcript structure (nested with start/duration/text)

Note: Existing data needs to be re-indexed for this fix to apply
to videos that already have secondary transcripts.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 ingest.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/ingest.py b/ingest.py
index ec61478..a87239c 100644
--- a/ingest.py
+++ b/ingest.py
@@ -74,6 +74,16 @@ def build_bulk_actions(
                 segment.get("text", "") if isinstance(segment, dict) else str(segment)
                 for segment in parts
             ).strip()
+
+        # Handle secondary transcript parts
+        secondary_parts = doc.get("transcript_secondary_parts") or []
+        transcript_secondary_full = doc.get("transcript_secondary_full")
+        if not transcript_secondary_full and isinstance(secondary_parts, list):
+            transcript_secondary_full = " ".join(
+                segment.get("text", "") if isinstance(segment, dict) else str(segment)
+                for segment in secondary_parts
+            ).strip()
+
         yield {
             "_id": video_id,
             "_index": index or CONFIG.elastic.index,
@@ -88,8 +98,9 @@ def build_bulk_actions(
                 "url": doc.get("url"),
                 "duration": doc.get("duration"),
                 "transcript_full": transcript_full,
-                "transcript_secondary_full": doc.get("transcript_secondary_full"),
+                "transcript_secondary_full": transcript_secondary_full,
                 "transcript_parts": parts,
+                "transcript_secondary_parts": secondary_parts,
             },
         }
 
@@ -121,6 +132,14 @@ def ensure_index(client: "Elasticsearch", index: str) -> None:
                         "text": {"type": "text"},
                     },
                 },
+                "transcript_secondary_parts": {
+                    "type": "nested",
+                    "properties": {
+                        "start": {"type": "float"},
+                        "duration": {"type": "float"},
+                        "text": {"type": "text"},
+                    },
+                },
             }
         },
     )