Fix secondary transcript timestamps by indexing parts
Previously, secondary transcript parts were not being indexed into Elasticsearch, causing the frontend to receive empty arrays and display zero timestamps. Changes: - Add transcript_secondary_parts to Elasticsearch mapping - Include secondary parts in bulk indexing actions - Build secondary_full text from parts if not provided - Match primary transcript structure (nested with start/duration/text) Note: Existing data needs to be re-indexed for this fix to apply to videos that already have secondary transcripts. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
b096e2aeeb
commit
2efe5e0c79
21
ingest.py
21
ingest.py
@ -74,6 +74,16 @@ def build_bulk_actions(
|
||||
segment.get("text", "") if isinstance(segment, dict) else str(segment)
|
||||
for segment in parts
|
||||
).strip()
|
||||
|
||||
# Handle secondary transcript parts
|
||||
secondary_parts = doc.get("transcript_secondary_parts") or []
|
||||
transcript_secondary_full = doc.get("transcript_secondary_full")
|
||||
if not transcript_secondary_full and isinstance(secondary_parts, list):
|
||||
transcript_secondary_full = " ".join(
|
||||
segment.get("text", "") if isinstance(segment, dict) else str(segment)
|
||||
for segment in secondary_parts
|
||||
).strip()
|
||||
|
||||
yield {
|
||||
"_id": video_id,
|
||||
"_index": index or CONFIG.elastic.index,
|
||||
@ -88,8 +98,9 @@ def build_bulk_actions(
|
||||
"url": doc.get("url"),
|
||||
"duration": doc.get("duration"),
|
||||
"transcript_full": transcript_full,
|
||||
"transcript_secondary_full": doc.get("transcript_secondary_full"),
|
||||
"transcript_secondary_full": transcript_secondary_full,
|
||||
"transcript_parts": parts,
|
||||
"transcript_secondary_parts": secondary_parts,
|
||||
},
|
||||
}
|
||||
|
||||
@ -121,6 +132,14 @@ def ensure_index(client: "Elasticsearch", index: str) -> None:
|
||||
"text": {"type": "text"},
|
||||
},
|
||||
},
|
||||
"transcript_secondary_parts": {
|
||||
"type": "nested",
|
||||
"properties": {
|
||||
"start": {"type": "float"},
|
||||
"duration": {"type": "float"},
|
||||
"text": {"type": "text"},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user