Revert "Fix secondary transcript timestamps by indexing parts"
This reverts commit 2efe5e0c799d2a177a520506a22b7fcb037ffe47.
This commit is contained in:
parent
2efe5e0c79
commit
e998eadd79
21
ingest.py
21
ingest.py
@ -74,16 +74,6 @@ def build_bulk_actions(
|
||||
segment.get("text", "") if isinstance(segment, dict) else str(segment)
|
||||
for segment in parts
|
||||
).strip()
|
||||
|
||||
# Handle secondary transcript parts
|
||||
secondary_parts = doc.get("transcript_secondary_parts") or []
|
||||
transcript_secondary_full = doc.get("transcript_secondary_full")
|
||||
if not transcript_secondary_full and isinstance(secondary_parts, list):
|
||||
transcript_secondary_full = " ".join(
|
||||
segment.get("text", "") if isinstance(segment, dict) else str(segment)
|
||||
for segment in secondary_parts
|
||||
).strip()
|
||||
|
||||
yield {
|
||||
"_id": video_id,
|
||||
"_index": index or CONFIG.elastic.index,
|
||||
@ -98,9 +88,8 @@ def build_bulk_actions(
|
||||
"url": doc.get("url"),
|
||||
"duration": doc.get("duration"),
|
||||
"transcript_full": transcript_full,
|
||||
"transcript_secondary_full": transcript_secondary_full,
|
||||
"transcript_secondary_full": doc.get("transcript_secondary_full"),
|
||||
"transcript_parts": parts,
|
||||
"transcript_secondary_parts": secondary_parts,
|
||||
},
|
||||
}
|
||||
|
||||
@ -132,14 +121,6 @@ def ensure_index(client: "Elasticsearch", index: str) -> None:
|
||||
"text": {"type": "text"},
|
||||
},
|
||||
},
|
||||
"transcript_secondary_parts": {
|
||||
"type": "nested",
|
||||
"properties": {
|
||||
"start": {"type": "float"},
|
||||
"duration": {"type": "float"},
|
||||
"text": {"type": "text"},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user