Add unified channel feed
This commit is contained in:
parent
63fe922860
commit
30503628b5
@ -9,3 +9,5 @@ node_modules
|
|||||||
data
|
data
|
||||||
videos
|
videos
|
||||||
*.log
|
*.log
|
||||||
|
feed-master-config/var
|
||||||
|
feed-master-config/images
|
||||||
|
|||||||
4
.gitignore
vendored
4
.gitignore
vendored
@ -51,6 +51,10 @@ Thumbs.db
|
|||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
|
# Feed Master runtime cache
|
||||||
|
feed-master-config/var/
|
||||||
|
feed-master-config/images/
|
||||||
|
|
||||||
# Testing
|
# Testing
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
.coverage
|
.coverage
|
||||||
|
|||||||
87
Makefile
Normal file
87
Makefile
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# Makefile for TLC Search + Feed Master
|
||||||
|
|
||||||
|
.PHONY: help config up down restart logs status update-channels
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "TLC Search + Feed Master - Management Commands"
|
||||||
|
@echo ""
|
||||||
|
@echo "Configuration:"
|
||||||
|
@echo " make config - Regenerate feed-master configuration from channels.yml"
|
||||||
|
@echo ""
|
||||||
|
@echo "Service Management:"
|
||||||
|
@echo " make up - Start all services"
|
||||||
|
@echo " make down - Stop all services"
|
||||||
|
@echo " make restart - Restart all services"
|
||||||
|
@echo " make logs - View all service logs"
|
||||||
|
@echo " make status - Check service status"
|
||||||
|
@echo ""
|
||||||
|
@echo "Updates:"
|
||||||
|
@echo " make update-channels - Regenerate config and restart feed-master"
|
||||||
|
@echo ""
|
||||||
|
@echo "Individual Services:"
|
||||||
|
@echo " make logs-feed - View feed-master logs"
|
||||||
|
@echo " make logs-bridge - View rss-bridge logs"
|
||||||
|
@echo " make logs-app - View TLC Search logs"
|
||||||
|
@echo " make restart-feed - Restart feed-master only"
|
||||||
|
|
||||||
|
# Generate feed-master configuration from channels.yml
|
||||||
|
config:
|
||||||
|
@echo "Generating feed-master configuration..."
|
||||||
|
python3 -m python_app.generate_feed_config_simple
|
||||||
|
@echo "Configuration updated!"
|
||||||
|
|
||||||
|
# Start all services
|
||||||
|
up:
|
||||||
|
docker compose up -d
|
||||||
|
@echo ""
|
||||||
|
@echo "Services started!"
|
||||||
|
@echo " - RSS Bridge: http://localhost:3001"
|
||||||
|
@echo " - Feed Master: http://localhost:8097/rss/youtube-unified"
|
||||||
|
@echo " - TLC Search: http://localhost:8080"
|
||||||
|
|
||||||
|
# Stop all services
|
||||||
|
down:
|
||||||
|
docker compose down
|
||||||
|
|
||||||
|
# Restart all services
|
||||||
|
restart:
|
||||||
|
docker compose restart
|
||||||
|
|
||||||
|
# View all logs
|
||||||
|
logs:
|
||||||
|
docker compose logs -f
|
||||||
|
|
||||||
|
# View feed-master logs
|
||||||
|
logs-feed:
|
||||||
|
docker compose logs -f feed-master
|
||||||
|
|
||||||
|
# View rss-bridge logs
|
||||||
|
logs-bridge:
|
||||||
|
docker compose logs -f rss-bridge
|
||||||
|
|
||||||
|
# View TLC Search logs
|
||||||
|
logs-app:
|
||||||
|
docker compose logs -f app
|
||||||
|
|
||||||
|
# Check service status
|
||||||
|
status:
|
||||||
|
@docker compose ps
|
||||||
|
@echo ""
|
||||||
|
@echo "Endpoints:"
|
||||||
|
@echo " - RSS Bridge: http://localhost:3001"
|
||||||
|
@echo " - Feed Master: http://localhost:8097/rss/youtube-unified"
|
||||||
|
@echo " - TLC Search: http://localhost:8080"
|
||||||
|
|
||||||
|
# Restart only feed-master
|
||||||
|
restart-feed:
|
||||||
|
docker compose restart feed-master
|
||||||
|
|
||||||
|
# Pull latest channel URLs and regenerate configuration
|
||||||
|
update-channels:
|
||||||
|
@echo "Regenerating feed-master configuration..."
|
||||||
|
python3 -m python_app.generate_feed_config_simple
|
||||||
|
@echo ""
|
||||||
|
@echo "Restarting feed-master..."
|
||||||
|
docker compose restart feed-master
|
||||||
|
@echo ""
|
||||||
|
@echo "Update complete!"
|
||||||
162
channel_config.py
Normal file
162
channel_config.py
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
_CHANNEL_ID_PATTERN = re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/channel/([^/?#]+)")
|
||||||
|
_HANDLE_PATTERN = re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/@([^/?#]+)")
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_quotes(value: str) -> str:
|
||||||
|
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
|
||||||
|
return value[1:-1]
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_yaml_channels(text: str) -> List[Dict[str, str]]:
|
||||||
|
channels: List[Dict[str, str]] = []
|
||||||
|
current: Dict[str, str] = {}
|
||||||
|
|
||||||
|
for raw_line in text.splitlines():
|
||||||
|
line = raw_line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
if line == "channels:":
|
||||||
|
continue
|
||||||
|
if line.startswith("- "):
|
||||||
|
if current:
|
||||||
|
channels.append(current)
|
||||||
|
current = {}
|
||||||
|
line = line[2:].strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if ":" not in line:
|
||||||
|
continue
|
||||||
|
key, value = line.split(":", 1)
|
||||||
|
current[key.strip()] = _strip_quotes(value.strip())
|
||||||
|
|
||||||
|
if current:
|
||||||
|
channels.append(current)
|
||||||
|
return channels
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_from_url(url: str) -> Dict[str, Optional[str]]:
|
||||||
|
channel_id = None
|
||||||
|
handle = None
|
||||||
|
|
||||||
|
channel_match = _CHANNEL_ID_PATTERN.search(url)
|
||||||
|
if channel_match:
|
||||||
|
channel_id = channel_match.group(1)
|
||||||
|
|
||||||
|
handle_match = _HANDLE_PATTERN.search(url)
|
||||||
|
if handle_match:
|
||||||
|
handle = handle_match.group(1)
|
||||||
|
|
||||||
|
return {"id": channel_id, "handle": handle}
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_handle(handle: Optional[str]) -> Optional[str]:
|
||||||
|
if not handle:
|
||||||
|
return None
|
||||||
|
return handle.lstrip("@").strip() or None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_bool(value: Optional[object]) -> Optional[bool]:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
text = str(value).strip().lower()
|
||||||
|
if text in {"1", "true", "yes", "y"}:
|
||||||
|
return True
|
||||||
|
if text in {"0", "false", "no", "n"}:
|
||||||
|
return False
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_entry(entry: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||||
|
channel_id = entry.get("id") or entry.get("channel_id")
|
||||||
|
handle = _normalize_handle(entry.get("handle") or entry.get("username"))
|
||||||
|
url = entry.get("url")
|
||||||
|
name = entry.get("name")
|
||||||
|
rss_flag = _parse_bool(
|
||||||
|
entry.get("rss_enabled") or entry.get("rss") or entry.get("include_in_feed")
|
||||||
|
)
|
||||||
|
|
||||||
|
if url:
|
||||||
|
extracted = _extract_from_url(url)
|
||||||
|
channel_id = channel_id or extracted.get("id")
|
||||||
|
handle = handle or extracted.get("handle")
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
if channel_id:
|
||||||
|
url = f"https://www.youtube.com/channel/{channel_id}"
|
||||||
|
elif handle:
|
||||||
|
url = f"https://www.youtube.com/@{handle}"
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
name = handle or channel_id
|
||||||
|
|
||||||
|
if not name or not url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
normalized = {
|
||||||
|
"id": channel_id or "",
|
||||||
|
"handle": handle or "",
|
||||||
|
"name": name,
|
||||||
|
"url": url,
|
||||||
|
"rss_enabled": True if rss_flag is None else rss_flag,
|
||||||
|
}
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
def load_channel_entries(path: Path) -> List[Dict[str, str]]:
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(path)
|
||||||
|
|
||||||
|
if path.suffix.lower() == ".json":
|
||||||
|
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
raw_entries = payload.get("channels", [])
|
||||||
|
else:
|
||||||
|
raw_entries = payload
|
||||||
|
else:
|
||||||
|
raw_entries = _parse_yaml_channels(path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
entries: List[Dict[str, str]] = []
|
||||||
|
for raw in raw_entries:
|
||||||
|
if not isinstance(raw, dict):
|
||||||
|
continue
|
||||||
|
raw_payload: Dict[str, Any] = {}
|
||||||
|
for key, value in raw.items():
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
if isinstance(value, bool):
|
||||||
|
raw_payload[str(key).strip()] = value
|
||||||
|
else:
|
||||||
|
raw_payload[str(key).strip()] = str(value).strip()
|
||||||
|
normalized = _normalize_entry(raw_payload)
|
||||||
|
if normalized:
|
||||||
|
entries.append(normalized)
|
||||||
|
|
||||||
|
entries.sort(key=lambda item: item["name"].lower())
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def build_rss_bridge_url(entry: Dict[str, str], rss_bridge_host: str = "rss-bridge") -> Optional[str]:
|
||||||
|
channel_id = entry.get("id") or ""
|
||||||
|
handle = _normalize_handle(entry.get("handle"))
|
||||||
|
|
||||||
|
if channel_id:
|
||||||
|
return (
|
||||||
|
f"http://{rss_bridge_host}/?action=display&bridge=YoutubeBridge"
|
||||||
|
f"&context=By+channel+id&c={channel_id}&format=Mrss"
|
||||||
|
)
|
||||||
|
if handle:
|
||||||
|
return (
|
||||||
|
f"http://{rss_bridge_host}/?action=display&bridge=YoutubeBridge"
|
||||||
|
f"&context=By+username&u={handle}&format=Mrss"
|
||||||
|
)
|
||||||
|
return None
|
||||||
258
channels.yml
Normal file
258
channels.yml
Normal file
@ -0,0 +1,258 @@
|
|||||||
|
# Shared YouTube Channel Configuration
|
||||||
|
# Used by both TLC Search (transcript collection) and Feed Master (RSS aggregation)
|
||||||
|
|
||||||
|
channels:
|
||||||
|
- id: UCCebR16tXbv5Ykk9_WtCCug
|
||||||
|
name: Channel UCCebR16tXbv
|
||||||
|
url: https://www.youtube.com/channel/UCCebR16tXbv5Ykk9_WtCCug/videos
|
||||||
|
- id: UC6vg0HkKKlgsWk-3HfV-vnw
|
||||||
|
name: A Quality Existence
|
||||||
|
url: https://www.youtube.com/channel/UC6vg0HkKKlgsWk-3HfV-vnw/videos
|
||||||
|
- id: UCeWWxwzgLYUbfjWowXhVdYw
|
||||||
|
name: Andrea with the Bangs
|
||||||
|
url: https://www.youtube.com/channel/UCeWWxwzgLYUbfjWowXhVdYw/videos
|
||||||
|
- id: UC952hDf_C4nYJdqwK7VzTxA
|
||||||
|
name: Charlie's Little Corner
|
||||||
|
url: https://www.youtube.com/channel/UC952hDf_C4nYJdqwK7VzTxA/videos
|
||||||
|
- id: UCU5SNBfTo4umhjYz6M0Jsmg
|
||||||
|
name: Christian Baxter
|
||||||
|
url: https://www.youtube.com/channel/UCU5SNBfTo4umhjYz6M0Jsmg/videos
|
||||||
|
- id: UC6Tvr9mBXNaAxLGRA_sUSRA
|
||||||
|
name: Finding Ideas
|
||||||
|
url: https://www.youtube.com/channel/UC6Tvr9mBXNaAxLGRA_sUSRA/videos
|
||||||
|
- id: UC4Rmxg7saTfwIpvq3QEzylQ
|
||||||
|
name: Ein Sof - Infinite Reflections
|
||||||
|
url: https://www.youtube.com/channel/UC4Rmxg7saTfwIpvq3QEzylQ/videos
|
||||||
|
- id: UCTdH4nh6JTcfKUAWvmnPoIQ
|
||||||
|
name: Eric Seitz
|
||||||
|
url: https://www.youtube.com/channel/UCTdH4nh6JTcfKUAWvmnPoIQ/videos
|
||||||
|
- id: UCsi_x8c12NW9FR7LL01QXKA
|
||||||
|
name: Grail Country
|
||||||
|
url: https://www.youtube.com/channel/UCsi_x8c12NW9FR7LL01QXKA/videos
|
||||||
|
- id: UCAqTQ5yLHHH44XWwWXLkvHQ
|
||||||
|
name: Grizwald Grim
|
||||||
|
url: https://www.youtube.com/channel/UCAqTQ5yLHHH44XWwWXLkvHQ/videos
|
||||||
|
- id: UCprytROeCztMOMe8plyJRMg
|
||||||
|
name: faturechi
|
||||||
|
url: https://www.youtube.com/channel/UCprytROeCztMOMe8plyJRMg/videos
|
||||||
|
- id: UCpqDUjTsof-kTNpnyWper_Q
|
||||||
|
name: John Vervaeke
|
||||||
|
url: https://www.youtube.com/channel/UCpqDUjTsof-kTNpnyWper_Q/videos
|
||||||
|
- id: UCL_f53ZEJxp8TtlOkHwMV9Q
|
||||||
|
name: Jordan B Peterson
|
||||||
|
url: https://www.youtube.com/channel/UCL_f53ZEJxp8TtlOkHwMV9Q/videos
|
||||||
|
- id: UCez1fzMRGctojfis2lfRYug
|
||||||
|
name: Lucas Vos
|
||||||
|
url: https://www.youtube.com/channel/UCez1fzMRGctojfis2lfRYug/videos
|
||||||
|
- id: UC2leFZRD0ZlQDQxpR2Zd8oA
|
||||||
|
name: Mary Kochan
|
||||||
|
url: https://www.youtube.com/channel/UC2leFZRD0ZlQDQxpR2Zd8oA/videos
|
||||||
|
- id: UC8SErJkYnDsYGh1HxoZkl-g
|
||||||
|
name: Sartori Studios
|
||||||
|
url: https://www.youtube.com/channel/UC8SErJkYnDsYGh1HxoZkl-g/videos
|
||||||
|
- id: UCEPOn4cgvrrerg_-q_Ygw1A
|
||||||
|
name: More Christ
|
||||||
|
url: https://www.youtube.com/channel/UCEPOn4cgvrrerg_-q_Ygw1A/videos
|
||||||
|
- id: UC2yCyOMUeem-cYwliC-tLJg
|
||||||
|
name: Paul Anleitner
|
||||||
|
url: https://www.youtube.com/channel/UC2yCyOMUeem-cYwliC-tLJg/videos
|
||||||
|
- id: UCGsDIP_K6J6VSTqlq-9IPlg
|
||||||
|
name: Paul VanderKlay
|
||||||
|
url: https://www.youtube.com/channel/UCGsDIP_K6J6VSTqlq-9IPlg/videos
|
||||||
|
- id: UCEzWTLDYmL8soRdQec9Fsjw
|
||||||
|
name: Randos United
|
||||||
|
url: https://www.youtube.com/channel/UCEzWTLDYmL8soRdQec9Fsjw/videos
|
||||||
|
- id: UC1KgNsMdRoIA_njVmaDdHgA
|
||||||
|
name: Randos United 2
|
||||||
|
url: https://www.youtube.com/channel/UC1KgNsMdRoIA_njVmaDdHgA/videos
|
||||||
|
- id: UCFQ6Gptuq-sLflbJ4YY3Umw
|
||||||
|
name: Rebel Wisdom
|
||||||
|
url: https://www.youtube.com/channel/UCFQ6Gptuq-sLflbJ4YY3Umw/videos
|
||||||
|
- id: UCEY1vGNBPsC3dCatZyK3Jkw
|
||||||
|
name: Strange Theology
|
||||||
|
url: https://www.youtube.com/channel/UCEY1vGNBPsC3dCatZyK3Jkw/videos
|
||||||
|
- id: UCIAtCuzdvgNJvSYILnHtdWA
|
||||||
|
name: The Anadromist
|
||||||
|
url: https://www.youtube.com/channel/UCIAtCuzdvgNJvSYILnHtdWA/videos
|
||||||
|
- id: UClIDP7_Kzv_7tDQjTv9EhrA
|
||||||
|
name: The Chris Show
|
||||||
|
url: https://www.youtube.com/channel/UClIDP7_Kzv_7tDQjTv9EhrA/videos
|
||||||
|
- id: UC-QiBn6GsM3JZJAeAQpaGAA
|
||||||
|
name: TheCommonToad
|
||||||
|
url: https://www.youtube.com/channel/UC-QiBn6GsM3JZJAeAQpaGAA/videos
|
||||||
|
- id: UCiJmdXTb76i8eIPXdJyf8ZQ
|
||||||
|
name: Channel UCiJmdXTb76i
|
||||||
|
url: https://www.youtube.com/channel/UCiJmdXTb76i8eIPXdJyf8ZQ/videos
|
||||||
|
- id: UCM9Z05vuQhMEwsV03u6DrLA
|
||||||
|
name: Cassidy van der Kamp
|
||||||
|
url: https://www.youtube.com/channel/UCM9Z05vuQhMEwsV03u6DrLA/videos
|
||||||
|
- id: UCgp_r6WlBwDSJrP43Mz07GQ
|
||||||
|
name: The Meaning Code
|
||||||
|
url: https://www.youtube.com/channel/UCgp_r6WlBwDSJrP43Mz07GQ/videos
|
||||||
|
- id: UC5uv-BxzCrN93B_5qbOdRWw
|
||||||
|
name: TheScrollersPodcast
|
||||||
|
url: https://www.youtube.com/channel/UC5uv-BxzCrN93B_5qbOdRWw/videos
|
||||||
|
- id: UCtCTSf3UwRU14nYWr_xm-dQ
|
||||||
|
name: Jonathan Pageau
|
||||||
|
url: https://www.youtube.com/channel/UCtCTSf3UwRU14nYWr_xm-dQ/videos
|
||||||
|
- id: UC1a4VtU_SMSfdRiwMJR33YQ
|
||||||
|
name: The Young Levite
|
||||||
|
url: https://www.youtube.com/channel/UC1a4VtU_SMSfdRiwMJR33YQ/videos
|
||||||
|
- id: UCg7Ed0lecvko58ibuX1XHng
|
||||||
|
name: Transfigured
|
||||||
|
url: https://www.youtube.com/channel/UCg7Ed0lecvko58ibuX1XHng/videos
|
||||||
|
- id: UCMVG5eqpYFVEB-a9IqAOuHA
|
||||||
|
name: President Foxman
|
||||||
|
url: https://www.youtube.com/channel/UCMVG5eqpYFVEB-a9IqAOuHA/videos
|
||||||
|
- id: UC8mJqpS_EBbMcyuzZDF0TEw
|
||||||
|
name: Neal Daedalus
|
||||||
|
url: https://www.youtube.com/channel/UC8mJqpS_EBbMcyuzZDF0TEw/videos
|
||||||
|
- id: UCGHuURJ1XFHzPSeokf6510A
|
||||||
|
name: Aphrael Pilotson
|
||||||
|
url: https://www.youtube.com/channel/UCGHuURJ1XFHzPSeokf6510A/videos
|
||||||
|
- id: UC704NVL2DyzYg3rMU9r1f7A
|
||||||
|
handle: chrishoward8473
|
||||||
|
name: Chris Howard
|
||||||
|
url: https://www.youtube.com/@chrishoward8473/videos
|
||||||
|
- id: UChptV-kf8lnncGh7DA2m8Pw
|
||||||
|
name: Shoulder Serf
|
||||||
|
url: https://www.youtube.com/channel/UChptV-kf8lnncGh7DA2m8Pw/videos
|
||||||
|
- id: UCzX6R3ZLQh5Zma_5AsPcqPA
|
||||||
|
name: Restoring Meaning
|
||||||
|
url: https://www.youtube.com/channel/UCzX6R3ZLQh5Zma_5AsPcqPA/videos
|
||||||
|
- id: UCiukuaNd_qzRDTW9qe2OC1w
|
||||||
|
name: Kale Zelden
|
||||||
|
url: https://www.youtube.com/channel/UCiukuaNd_qzRDTW9qe2OC1w/videos
|
||||||
|
- id: UC5yLuFQCms4nb9K2bGQLqIw
|
||||||
|
name: Ron Copperman
|
||||||
|
url: https://www.youtube.com/channel/UC5yLuFQCms4nb9K2bGQLqIw/videos
|
||||||
|
- id: UCVdSgEf9bLXFMBGSMhn7x4Q
|
||||||
|
name: Mark D Parker
|
||||||
|
url: https://www.youtube.com/channel/UCVdSgEf9bLXFMBGSMhn7x4Q/videos
|
||||||
|
- id: UC_dnk5D4tFCRYCrKIcQlcfw
|
||||||
|
name: Luke Thompson
|
||||||
|
url: https://www.youtube.com/channel/UC_dnk5D4tFCRYCrKIcQlcfw/videos
|
||||||
|
- id: UCT8Lq3ufaGEnCSS8WpFatqw
|
||||||
|
handle: Freerilian
|
||||||
|
name: Free Rilian
|
||||||
|
url: https://www.youtube.com/@Freerilian/videos
|
||||||
|
- id: UC977g6oGYIJDQnsZOGjQBBA
|
||||||
|
handle: marks.-ry7bm
|
||||||
|
name: Mark S
|
||||||
|
url: https://www.youtube.com/@marks.-ry7bm/videos
|
||||||
|
- id: UCbD1Pm0TOcRK2zaCrwgcTTg
|
||||||
|
handle: Adams-Fall
|
||||||
|
name: Adams Fall
|
||||||
|
url: https://www.youtube.com/@Adams-Fall/videos
|
||||||
|
- id: UCnojyPW0IgLWTQ0SaDQ1KBA
|
||||||
|
handle: mcmosav
|
||||||
|
name: mcmosav
|
||||||
|
url: https://www.youtube.com/@mcmosav/videos
|
||||||
|
- id: UCiOZYvBGHw1Y6wyzffwEp9g
|
||||||
|
handle: Landbeorht
|
||||||
|
name: Joseph Lambrecht
|
||||||
|
url: https://www.youtube.com/@Landbeorht/videos
|
||||||
|
- id: UCAXyF_HFeMgwS8nkGVeroAA
|
||||||
|
handle: Corner_Citizen
|
||||||
|
name: Corner Citizen
|
||||||
|
url: https://www.youtube.com/@Corner_Citizen/videos
|
||||||
|
- id: UCv2Qft5mZrmA9XAwnl9PU-g
|
||||||
|
handle: ethan.caughey
|
||||||
|
name: Ethan Caughey
|
||||||
|
url: https://www.youtube.com/@ethan.caughey/videos
|
||||||
|
- id: UCMJCtS8jKouJ2d8UIYzW3vg
|
||||||
|
handle: MarcInTbilisi
|
||||||
|
name: Marc Jackson
|
||||||
|
url: https://www.youtube.com/@MarcInTbilisi/videos
|
||||||
|
- id: UCk9O91WwruXmgu1NQrKZZEw
|
||||||
|
handle: climbingmt.sophia
|
||||||
|
name: Climbing Mt Sophia
|
||||||
|
url: https://www.youtube.com/@climbingmt.sophia/videos
|
||||||
|
- id: UCUSyTPWW4JaG1YfUPddw47Q
|
||||||
|
handle: Skankenstein
|
||||||
|
name: Skankenstein
|
||||||
|
url: https://www.youtube.com/@Skankenstein/videos
|
||||||
|
- id: UCzw2FNI3IRphcAoVcUENOgQ
|
||||||
|
handle: UpCycleClub
|
||||||
|
name: UpCycleClub
|
||||||
|
url: https://www.youtube.com/@UpCycleClub/videos
|
||||||
|
- id: UCQ7rVoApmYIpcmU7fB9RPyw
|
||||||
|
handle: JessPurviance
|
||||||
|
name: Jesspurviance
|
||||||
|
url: https://www.youtube.com/@JessPurviance/videos
|
||||||
|
- id: UCrZyTWGMdRM9_P26RKPvh3A
|
||||||
|
handle: greyhamilton52
|
||||||
|
name: Grey Hamilton
|
||||||
|
url: https://www.youtube.com/@greyhamilton52/videos
|
||||||
|
- id: UCDCfI162vhPvwdxW6X4nmiw
|
||||||
|
handle: paulrenenichols
|
||||||
|
name: Paul Rene Nichols
|
||||||
|
url: https://www.youtube.com/@paulrenenichols/videos
|
||||||
|
- id: UCFLovlJ8RFApfjrf2y157xg
|
||||||
|
handle: OfficialSecularKoranism
|
||||||
|
name: Secular Koranism
|
||||||
|
url: https://www.youtube.com/@OfficialSecularKoranism/videos
|
||||||
|
- id: UC_-YQbnPfBbIezMr1adZZiQ
|
||||||
|
handle: FromWhomAllBlessingsFlow
|
||||||
|
name: From Whom All Blessings Flow
|
||||||
|
url: https://www.youtube.com/@FromWhomAllBlessingsFlow/videos
|
||||||
|
- id: UCn5mf-fcpBmkepIpZ8eFRng
|
||||||
|
handle: FoodTruckEmily
|
||||||
|
name: Emily Rajeh
|
||||||
|
url: https://www.youtube.com/@FoodTruckEmily/videos
|
||||||
|
- id: UC6zHDj4D323xJkblnPTvY3Q
|
||||||
|
handle: O.G.Rose.Michelle.and.Daniel
|
||||||
|
name: OG Rose
|
||||||
|
url: https://www.youtube.com/@O.G.Rose.Michelle.and.Daniel/videos
|
||||||
|
- id: UC4GiA5Hnwy415uVRymxPK-w
|
||||||
|
handle: JonathanDumeer
|
||||||
|
name: Jonathan Dumeer
|
||||||
|
url: https://www.youtube.com/@JonathanDumeer/videos
|
||||||
|
- id: UCMzT-mdCqoyEv_-YZVtE7MQ
|
||||||
|
handle: JordanGreenhall
|
||||||
|
name: Jordan Hall
|
||||||
|
url: https://www.youtube.com/@JordanGreenhall/videos
|
||||||
|
- id: UC5goUoFM4LPim4eY4pwRXYw
|
||||||
|
handle: NechamaGluck
|
||||||
|
name: Nechama Gluck
|
||||||
|
url: https://www.youtube.com/@NechamaGluck/videos
|
||||||
|
- id: UCPUVeoQYyq8cndWwyczX6RA
|
||||||
|
handle: justinsmorningcoffee
|
||||||
|
name: Justinsmorningcoffee
|
||||||
|
url: https://www.youtube.com/@justinsmorningcoffee/videos
|
||||||
|
- id: UCB0C8DEIQlQzvSGuGriBxtA
|
||||||
|
handle: grahampardun
|
||||||
|
name: Grahampardun
|
||||||
|
url: https://www.youtube.com/@grahampardun/videos
|
||||||
|
- id: UCpLJJLVB_7v4Igq-9arja1A
|
||||||
|
handle: michaelmartin8681
|
||||||
|
name: Michaelmartin8681
|
||||||
|
url: https://www.youtube.com/@michaelmartin8681/videos
|
||||||
|
- id: UCxV18lwwh29DiWuooz7UCvg
|
||||||
|
handle: davidbusuttil9086
|
||||||
|
name: Davidbusuttil9086
|
||||||
|
url: https://www.youtube.com/@davidbusuttil9086/videos
|
||||||
|
- id: UCosBhpwwGh_ueYq4ZSi5dGw
|
||||||
|
handle: matthewparlato5626
|
||||||
|
name: Matthewparlato5626
|
||||||
|
url: https://www.youtube.com/@matthewparlato5626/videos
|
||||||
|
- id: UCwF5LWNOFou_50bT65bq4Bg
|
||||||
|
handle: lancecleaver227
|
||||||
|
name: Lancecleaver227
|
||||||
|
url: https://www.youtube.com/@lancecleaver227/videos
|
||||||
|
- id: UCaJ0CqiiMSTq4X0rycUOIjw
|
||||||
|
handle: theplebistocrat
|
||||||
|
name: the plebistocrat
|
||||||
|
url: https://www.youtube.com/@theplebistocrat/videos
|
||||||
|
- id: UCZA5mUAyYcCL1kYgxbeMNrA
|
||||||
|
handle: RightInChrist
|
||||||
|
name: Rightinchrist
|
||||||
|
url: https://www.youtube.com/@RightInChrist/videos
|
||||||
|
- id: UCDIPXp88qjAV3TiaR5Uo3iQ
|
||||||
|
handle: RafeKelley
|
||||||
|
name: Rafekelley
|
||||||
|
url: https://www.youtube.com/@RafeKelley/videos
|
||||||
|
- id: UCedgru6YCto3zyXjlbuQuqA
|
||||||
|
handle: WavesOfObsession
|
||||||
|
name: Wavesofobsession
|
||||||
|
url: https://www.youtube.com/@WavesOfObsession/videos
|
||||||
14
config.py
14
config.py
@ -6,6 +6,9 @@ Environment Variables:
|
|||||||
ELASTIC_USERNAME / ELASTIC_PASSWORD: Optional basic auth credentials.
|
ELASTIC_USERNAME / ELASTIC_PASSWORD: Optional basic auth credentials.
|
||||||
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
|
ELASTIC_INDEX: Target index name (default: this_little_corner_py).
|
||||||
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
|
LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
|
||||||
|
CHANNELS_PATH: Path to the canonical channel list (default: ./channels.yml).
|
||||||
|
RSS_FEED_URL: Public URL/path for the unified RSS feed (default: /rss/youtube-unified).
|
||||||
|
RSS_FEED_UPSTREAM: Base URL to proxy feed requests (default: http://localhost:8097).
|
||||||
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
|
YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
|
||||||
RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1).
|
RATE_LIMIT_ENABLED: Toggle API rate limiting (default: 1).
|
||||||
RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60).
|
RATE_LIMIT_REQUESTS: Max requests per window per client (default: 60).
|
||||||
@ -74,6 +77,9 @@ class AppConfig:
|
|||||||
qdrant_vector_name: Optional[str]
|
qdrant_vector_name: Optional[str]
|
||||||
qdrant_vector_size: int
|
qdrant_vector_size: int
|
||||||
qdrant_embed_model: str
|
qdrant_embed_model: str
|
||||||
|
channels_path: Path
|
||||||
|
rss_feed_url: str
|
||||||
|
rss_feed_upstream: str
|
||||||
|
|
||||||
|
|
||||||
def _env(name: str, default: Optional[str] = None) -> Optional[str]:
|
def _env(name: str, default: Optional[str] = None) -> Optional[str]:
|
||||||
@ -110,6 +116,11 @@ def load_config() -> AppConfig:
|
|||||||
requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0),
|
requests=max(int(_env("RATE_LIMIT_REQUESTS", "60")), 0),
|
||||||
window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1),
|
window_seconds=max(int(_env("RATE_LIMIT_WINDOW_SECONDS", "60")), 1),
|
||||||
)
|
)
|
||||||
|
channels_path = Path(
|
||||||
|
_env("CHANNELS_PATH", str(Path(__file__).parent / "channels.yml"))
|
||||||
|
).expanduser()
|
||||||
|
rss_feed_url = _env("RSS_FEED_URL", "/rss/youtube-unified")
|
||||||
|
rss_feed_upstream = _env("RSS_FEED_UPSTREAM", "http://localhost:8097")
|
||||||
return AppConfig(
|
return AppConfig(
|
||||||
elastic=elastic,
|
elastic=elastic,
|
||||||
data=data,
|
data=data,
|
||||||
@ -120,6 +131,9 @@ def load_config() -> AppConfig:
|
|||||||
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
|
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
|
||||||
qdrant_vector_size=int(_env("QDRANT_VECTOR_SIZE", "1024")),
|
qdrant_vector_size=int(_env("QDRANT_VECTOR_SIZE", "1024")),
|
||||||
qdrant_embed_model=_env("QDRANT_EMBED_MODEL", "BAAI/bge-large-en-v1.5"),
|
qdrant_embed_model=_env("QDRANT_EMBED_MODEL", "BAAI/bge-large-en-v1.5"),
|
||||||
|
channels_path=channels_path,
|
||||||
|
rss_feed_url=rss_feed_url or "",
|
||||||
|
rss_feed_upstream=rss_feed_upstream or "",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,8 +1,47 @@
|
|||||||
version: "3.9"
|
version: "3.9"
|
||||||
|
|
||||||
# Runs only the Flask app container, pointing to remote Elasticsearch/Qdrant.
|
# TLC Search + Feed Master - Complete YouTube content indexing & RSS aggregation
|
||||||
# Provide ELASTIC_URL / QDRANT_URL (and related) via environment or a .env file.
|
# Provide ELASTIC_URL / QDRANT_URL (and related) via environment or a .env file.
|
||||||
services:
|
services:
|
||||||
|
# RSS Bridge - Converts YouTube channels to RSS feeds
|
||||||
|
rss-bridge:
|
||||||
|
image: rssbridge/rss-bridge:latest
|
||||||
|
container_name: tlc-rss-bridge
|
||||||
|
hostname: rss-bridge
|
||||||
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "5"
|
||||||
|
ports:
|
||||||
|
- "3001:80"
|
||||||
|
|
||||||
|
# Feed Master - Aggregates multiple RSS feeds into unified feed
|
||||||
|
feed-master:
|
||||||
|
image: umputun/feed-master:latest
|
||||||
|
container_name: tlc-feed-master
|
||||||
|
hostname: feed-master
|
||||||
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- rss-bridge
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "5"
|
||||||
|
environment:
|
||||||
|
- DEBUG=false
|
||||||
|
- FM_DB=/srv/var/feed-master.bdb
|
||||||
|
- FM_CONF=/srv/etc/fm.yml
|
||||||
|
volumes:
|
||||||
|
- ./feed-master-config:/srv/etc
|
||||||
|
- ./feed-master-config/var:/srv/var
|
||||||
|
- ./feed-master-config/images:/srv/images
|
||||||
|
ports:
|
||||||
|
- "8097:8080"
|
||||||
|
|
||||||
|
# TLC Search - Flask app for searching YouTube transcripts
|
||||||
app:
|
app:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
@ -16,6 +55,9 @@ services:
|
|||||||
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
|
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
|
||||||
ELASTIC_API_KEY: ${ELASTIC_API_KEY:-}
|
ELASTIC_API_KEY: ${ELASTIC_API_KEY:-}
|
||||||
ELASTIC_VERIFY_CERTS: ${ELASTIC_VERIFY_CERTS:-0}
|
ELASTIC_VERIFY_CERTS: ${ELASTIC_VERIFY_CERTS:-0}
|
||||||
|
CHANNELS_PATH: ${CHANNELS_PATH:-/app/python_app/channels.yml}
|
||||||
|
RSS_FEED_URL: ${RSS_FEED_URL:-/rss/youtube-unified}
|
||||||
|
RSS_FEED_UPSTREAM: ${RSS_FEED_UPSTREAM:-http://feed-master:8080}
|
||||||
QDRANT_URL: ${QDRANT_URL:?set QDRANT_URL to your remote Qdrant URL}
|
QDRANT_URL: ${QDRANT_URL:?set QDRANT_URL to your remote Qdrant URL}
|
||||||
QDRANT_COLLECTION: ${QDRANT_COLLECTION:-tlc-captions-full}
|
QDRANT_COLLECTION: ${QDRANT_COLLECTION:-tlc-captions-full}
|
||||||
QDRANT_VECTOR_NAME: ${QDRANT_VECTOR_NAME:-}
|
QDRANT_VECTOR_NAME: ${QDRANT_VECTOR_NAME:-}
|
||||||
@ -23,4 +65,5 @@ services:
|
|||||||
QDRANT_EMBED_MODEL: ${QDRANT_EMBED_MODEL:-BAAI/bge-large-en-v1.5}
|
QDRANT_EMBED_MODEL: ${QDRANT_EMBED_MODEL:-BAAI/bge-large-en-v1.5}
|
||||||
LOCAL_DATA_DIR: ${LOCAL_DATA_DIR:-/app/data/video_metadata}
|
LOCAL_DATA_DIR: ${LOCAL_DATA_DIR:-/app/data/video_metadata}
|
||||||
volumes:
|
volumes:
|
||||||
|
- ./channels.yml:/app/python_app/channels.yml:ro
|
||||||
- ./data:/app/data:ro
|
- ./data:/app/data:ro
|
||||||
|
|||||||
166
feed-master-config/fm.yml
Normal file
166
feed-master-config/fm.yml
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
# Feed Master Configuration
|
||||||
|
# Auto-generated from channels.yml
|
||||||
|
# Do not edit manually - regenerate using generate_feed_config_simple.py
|
||||||
|
|
||||||
|
feeds:
|
||||||
|
youtube-unified:
|
||||||
|
title: YouTube Unified Feed
|
||||||
|
description: Aggregated feed from all YouTube channels
|
||||||
|
link: https://youtube.com
|
||||||
|
language: "en-us"
|
||||||
|
sources:
|
||||||
|
- name: A Quality Existence
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC6vg0HkKKlgsWk-3HfV-vnw&format=Mrss
|
||||||
|
- name: Adams Fall
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCbD1Pm0TOcRK2zaCrwgcTTg&format=Mrss
|
||||||
|
- name: Andrea with the Bangs
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCeWWxwzgLYUbfjWowXhVdYw&format=Mrss
|
||||||
|
- name: Aphrael Pilotson
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCGHuURJ1XFHzPSeokf6510A&format=Mrss
|
||||||
|
- name: Cassidy van der Kamp
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCM9Z05vuQhMEwsV03u6DrLA&format=Mrss
|
||||||
|
- name: Channel UCCebR16tXbv
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCCebR16tXbv5Ykk9_WtCCug&format=Mrss
|
||||||
|
- name: Channel UCiJmdXTb76i
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCiJmdXTb76i8eIPXdJyf8ZQ&format=Mrss
|
||||||
|
- name: Charlie's Little Corner
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC952hDf_C4nYJdqwK7VzTxA&format=Mrss
|
||||||
|
- name: Chris Howard
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC704NVL2DyzYg3rMU9r1f7A&format=Mrss
|
||||||
|
- name: Christian Baxter
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCU5SNBfTo4umhjYz6M0Jsmg&format=Mrss
|
||||||
|
- name: Climbing Mt Sophia
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCk9O91WwruXmgu1NQrKZZEw&format=Mrss
|
||||||
|
- name: Corner Citizen
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCAXyF_HFeMgwS8nkGVeroAA&format=Mrss
|
||||||
|
- name: Davidbusuttil9086
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCxV18lwwh29DiWuooz7UCvg&format=Mrss
|
||||||
|
- name: Ein Sof - Infinite Reflections
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC4Rmxg7saTfwIpvq3QEzylQ&format=Mrss
|
||||||
|
- name: Emily Rajeh
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCn5mf-fcpBmkepIpZ8eFRng&format=Mrss
|
||||||
|
- name: Eric Seitz
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCTdH4nh6JTcfKUAWvmnPoIQ&format=Mrss
|
||||||
|
- name: Ethan Caughey
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCv2Qft5mZrmA9XAwnl9PU-g&format=Mrss
|
||||||
|
- name: faturechi
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCprytROeCztMOMe8plyJRMg&format=Mrss
|
||||||
|
- name: Finding Ideas
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC6Tvr9mBXNaAxLGRA_sUSRA&format=Mrss
|
||||||
|
- name: Free Rilian
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCT8Lq3ufaGEnCSS8WpFatqw&format=Mrss
|
||||||
|
- name: From Whom All Blessings Flow
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC_-YQbnPfBbIezMr1adZZiQ&format=Mrss
|
||||||
|
- name: Grahampardun
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCB0C8DEIQlQzvSGuGriBxtA&format=Mrss
|
||||||
|
- name: Grail Country
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCsi_x8c12NW9FR7LL01QXKA&format=Mrss
|
||||||
|
- name: Grey Hamilton
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCrZyTWGMdRM9_P26RKPvh3A&format=Mrss
|
||||||
|
- name: Grizwald Grim
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCAqTQ5yLHHH44XWwWXLkvHQ&format=Mrss
|
||||||
|
- name: Jesspurviance
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCQ7rVoApmYIpcmU7fB9RPyw&format=Mrss
|
||||||
|
- name: John Vervaeke
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCpqDUjTsof-kTNpnyWper_Q&format=Mrss
|
||||||
|
- name: Jonathan Dumeer
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC4GiA5Hnwy415uVRymxPK-w&format=Mrss
|
||||||
|
- name: Jonathan Pageau
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCtCTSf3UwRU14nYWr_xm-dQ&format=Mrss
|
||||||
|
- name: Jordan B Peterson
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCL_f53ZEJxp8TtlOkHwMV9Q&format=Mrss
|
||||||
|
- name: Jordan Hall
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCMzT-mdCqoyEv_-YZVtE7MQ&format=Mrss
|
||||||
|
- name: Joseph Lambrecht
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCiOZYvBGHw1Y6wyzffwEp9g&format=Mrss
|
||||||
|
- name: Justinsmorningcoffee
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCPUVeoQYyq8cndWwyczX6RA&format=Mrss
|
||||||
|
- name: Kale Zelden
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCiukuaNd_qzRDTW9qe2OC1w&format=Mrss
|
||||||
|
- name: Lancecleaver227
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCwF5LWNOFou_50bT65bq4Bg&format=Mrss
|
||||||
|
- name: Lucas Vos
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCez1fzMRGctojfis2lfRYug&format=Mrss
|
||||||
|
- name: Luke Thompson
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC_dnk5D4tFCRYCrKIcQlcfw&format=Mrss
|
||||||
|
- name: Marc Jackson
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCMJCtS8jKouJ2d8UIYzW3vg&format=Mrss
|
||||||
|
- name: Mark D Parker
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCVdSgEf9bLXFMBGSMhn7x4Q&format=Mrss
|
||||||
|
- name: Mark S
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC977g6oGYIJDQnsZOGjQBBA&format=Mrss
|
||||||
|
- name: Mary Kochan
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC2leFZRD0ZlQDQxpR2Zd8oA&format=Mrss
|
||||||
|
- name: Matthewparlato5626
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCosBhpwwGh_ueYq4ZSi5dGw&format=Mrss
|
||||||
|
- name: mcmosav
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCnojyPW0IgLWTQ0SaDQ1KBA&format=Mrss
|
||||||
|
- name: Michaelmartin8681
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCpLJJLVB_7v4Igq-9arja1A&format=Mrss
|
||||||
|
- name: More Christ
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCEPOn4cgvrrerg_-q_Ygw1A&format=Mrss
|
||||||
|
- name: Neal Daedalus
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC8mJqpS_EBbMcyuzZDF0TEw&format=Mrss
|
||||||
|
- name: Nechama Gluck
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC5goUoFM4LPim4eY4pwRXYw&format=Mrss
|
||||||
|
- name: OG Rose
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC6zHDj4D323xJkblnPTvY3Q&format=Mrss
|
||||||
|
- name: Paul Anleitner
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC2yCyOMUeem-cYwliC-tLJg&format=Mrss
|
||||||
|
- name: Paul Rene Nichols
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCDCfI162vhPvwdxW6X4nmiw&format=Mrss
|
||||||
|
- name: Paul VanderKlay
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCGsDIP_K6J6VSTqlq-9IPlg&format=Mrss
|
||||||
|
- name: President Foxman
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCMVG5eqpYFVEB-a9IqAOuHA&format=Mrss
|
||||||
|
- name: Rafekelley
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCDIPXp88qjAV3TiaR5Uo3iQ&format=Mrss
|
||||||
|
- name: Randos United
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCEzWTLDYmL8soRdQec9Fsjw&format=Mrss
|
||||||
|
- name: Randos United 2
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC1KgNsMdRoIA_njVmaDdHgA&format=Mrss
|
||||||
|
- name: Rebel Wisdom
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCFQ6Gptuq-sLflbJ4YY3Umw&format=Mrss
|
||||||
|
- name: Restoring Meaning
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCzX6R3ZLQh5Zma_5AsPcqPA&format=Mrss
|
||||||
|
- name: Rightinchrist
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCZA5mUAyYcCL1kYgxbeMNrA&format=Mrss
|
||||||
|
- name: Ron Copperman
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC5yLuFQCms4nb9K2bGQLqIw&format=Mrss
|
||||||
|
- name: Sartori Studios
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC8SErJkYnDsYGh1HxoZkl-g&format=Mrss
|
||||||
|
- name: Secular Koranism
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCFLovlJ8RFApfjrf2y157xg&format=Mrss
|
||||||
|
- name: Shoulder Serf
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UChptV-kf8lnncGh7DA2m8Pw&format=Mrss
|
||||||
|
- name: Skankenstein
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCUSyTPWW4JaG1YfUPddw47Q&format=Mrss
|
||||||
|
- name: Strange Theology
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCEY1vGNBPsC3dCatZyK3Jkw&format=Mrss
|
||||||
|
- name: The Anadromist
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCIAtCuzdvgNJvSYILnHtdWA&format=Mrss
|
||||||
|
- name: The Chris Show
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UClIDP7_Kzv_7tDQjTv9EhrA&format=Mrss
|
||||||
|
- name: The Meaning Code
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCgp_r6WlBwDSJrP43Mz07GQ&format=Mrss
|
||||||
|
- name: the plebistocrat
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCaJ0CqiiMSTq4X0rycUOIjw&format=Mrss
|
||||||
|
- name: The Young Levite
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC1a4VtU_SMSfdRiwMJR33YQ&format=Mrss
|
||||||
|
- name: TheCommonToad
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC-QiBn6GsM3JZJAeAQpaGAA&format=Mrss
|
||||||
|
- name: TheScrollersPodcast
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UC5uv-BxzCrN93B_5qbOdRWw&format=Mrss
|
||||||
|
- name: Transfigured
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCg7Ed0lecvko58ibuX1XHng&format=Mrss
|
||||||
|
- name: UpCycleClub
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCzw2FNI3IRphcAoVcUENOgQ&format=Mrss
|
||||||
|
- name: Wavesofobsession
|
||||||
|
url: http://rss-bridge/?action=display&bridge=YoutubeBridge&context=By+channel+id&c=UCedgru6YCto3zyXjlbuQuqA&format=Mrss
|
||||||
|
|
||||||
|
system:
|
||||||
|
update: 5m
|
||||||
|
max_per_feed: 5
|
||||||
|
max_total: 200
|
||||||
|
max_keep: 1000
|
||||||
|
base_url: http://localhost:8097
|
||||||
91
generate_feed_config.py
Normal file
91
generate_feed_config.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Generate feed-master configuration from channels.yml.
|
||||||
|
This ensures a single source of truth for the YouTube channels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .channel_config import build_rss_bridge_url, load_channel_entries
|
||||||
|
|
||||||
|
|
||||||
|
def generate_fm_config(channels_file, output_file, rss_bridge_host="rss-bridge"):
|
||||||
|
"""Generate feed-master YAML configuration from channels.yml"""
|
||||||
|
|
||||||
|
print(f"Reading channels from {channels_file}")
|
||||||
|
channels = load_channel_entries(Path(channels_file))
|
||||||
|
print(f"Found {len(channels)} channels")
|
||||||
|
|
||||||
|
# Generate feed configuration
|
||||||
|
config = []
|
||||||
|
config.append("# Feed Master Configuration")
|
||||||
|
config.append("# Auto-generated from channels.yml")
|
||||||
|
config.append("# Do not edit manually - regenerate using generate_feed_config.py")
|
||||||
|
config.append("")
|
||||||
|
config.append("feeds:")
|
||||||
|
config.append(" youtube-unified:")
|
||||||
|
config.append(" title: YouTube Unified Feed")
|
||||||
|
config.append(" description: Aggregated feed from all YouTube channels")
|
||||||
|
config.append(" link: https://youtube.com")
|
||||||
|
config.append(' language: "en-us"')
|
||||||
|
config.append(" sources:")
|
||||||
|
|
||||||
|
processed = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
for channel in channels:
|
||||||
|
if not channel.get("rss_enabled", True):
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
bridge_url = build_rss_bridge_url(channel, rss_bridge_host=rss_bridge_host)
|
||||||
|
if not bridge_url:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
name = channel.get("name", "Unknown")
|
||||||
|
config.append(f" - name: {name}")
|
||||||
|
config.append(f" url: {bridge_url}")
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
# Add system configuration
|
||||||
|
config.append("")
|
||||||
|
config.append("system:")
|
||||||
|
config.append(" update: 5m")
|
||||||
|
config.append(" max_per_feed: 5")
|
||||||
|
config.append(" max_total: 200")
|
||||||
|
config.append(" max_keep: 1000")
|
||||||
|
config.append(" base_url: http://localhost:8097")
|
||||||
|
|
||||||
|
# Write output
|
||||||
|
print(f"\nProcessed {processed} channels, skipped {skipped}")
|
||||||
|
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
f.write('\n'.join(config))
|
||||||
|
|
||||||
|
print(f"Configuration written to {output_file}")
|
||||||
|
print(f"\nTo apply this configuration:")
|
||||||
|
print(f" 1. Copy {output_file} to feed-master/etc/fm.yml")
|
||||||
|
print(f" 2. Restart the feed-master service")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Default paths
|
||||||
|
script_dir = Path(__file__).parent
|
||||||
|
channels_file = script_dir / "channels.yml"
|
||||||
|
output_file = script_dir / "feed-master-config" / "fm.yml"
|
||||||
|
|
||||||
|
# Allow overriding via command line
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
channels_file = Path(sys.argv[1])
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
output_file = Path(sys.argv[2])
|
||||||
|
|
||||||
|
if not channels_file.exists():
|
||||||
|
print(f"Error: {channels_file} not found", file=sys.stderr)
|
||||||
|
print(f"\nUsage: {sys.argv[0]} [channels.yml] [output.yml]", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Ensure output directory exists
|
||||||
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
generate_fm_config(channels_file, output_file)
|
||||||
88
generate_feed_config_simple.py
Executable file
88
generate_feed_config_simple.py
Executable file
@ -0,0 +1,88 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Generate feed-master configuration from channels.yml.
|
||||||
|
Simplified version that doesn't require RSS-Bridge to be running.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .channel_config import build_rss_bridge_url, load_channel_entries
|
||||||
|
|
||||||
|
|
||||||
|
def generate_fm_config(channels_file, output_file, rss_bridge_host="rss-bridge"):
|
||||||
|
"""Generate feed-master YAML configuration from channels.yml"""
|
||||||
|
|
||||||
|
print(f"Reading channels from {channels_file}")
|
||||||
|
channels = load_channel_entries(Path(channels_file))
|
||||||
|
print(f"Found {len(channels)} channels")
|
||||||
|
|
||||||
|
# Generate feed configuration
|
||||||
|
config = []
|
||||||
|
config.append("# Feed Master Configuration")
|
||||||
|
config.append("# Auto-generated from channels.yml")
|
||||||
|
config.append("# Do not edit manually - regenerate using generate_feed_config_simple.py")
|
||||||
|
config.append("")
|
||||||
|
config.append("feeds:")
|
||||||
|
config.append(" youtube-unified:")
|
||||||
|
config.append(" title: YouTube Unified Feed")
|
||||||
|
config.append(" description: Aggregated feed from all YouTube channels")
|
||||||
|
config.append(" link: https://youtube.com")
|
||||||
|
config.append(' language: "en-us"')
|
||||||
|
config.append(" sources:")
|
||||||
|
|
||||||
|
processed = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
for channel in channels:
|
||||||
|
if not channel.get("rss_enabled", True):
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
bridge_url = build_rss_bridge_url(channel, rss_bridge_host=rss_bridge_host)
|
||||||
|
if not bridge_url:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
name = channel.get("name", "Unknown")
|
||||||
|
config.append(f" - name: {name}")
|
||||||
|
config.append(f" url: {bridge_url}")
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
# Add system configuration
|
||||||
|
config.append("")
|
||||||
|
config.append("system:")
|
||||||
|
config.append(" update: 5m")
|
||||||
|
config.append(" max_per_feed: 5")
|
||||||
|
config.append(" max_total: 200")
|
||||||
|
config.append(" max_keep: 1000")
|
||||||
|
config.append(" base_url: http://localhost:8097")
|
||||||
|
|
||||||
|
# Write output
|
||||||
|
print(f"\nProcessed {processed} channels, skipped {skipped}")
|
||||||
|
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
f.write('\n'.join(config))
|
||||||
|
|
||||||
|
print(f"Configuration written to {output_file}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Default paths
|
||||||
|
script_dir = Path(__file__).parent
|
||||||
|
channels_file = script_dir / "channels.yml"
|
||||||
|
output_file = script_dir / "feed-master-config" / "fm.yml"
|
||||||
|
|
||||||
|
# Allow overriding via command line
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
channels_file = Path(sys.argv[1])
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
output_file = Path(sys.argv[2])
|
||||||
|
|
||||||
|
if not channels_file.exists():
|
||||||
|
print(f"Error: {channels_file} not found", file=sys.stderr)
|
||||||
|
print(f"\nUsage: {sys.argv[0]} [channels.yml] [output.yml]", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Ensure output directory exists
|
||||||
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
generate_fm_config(channels_file, output_file)
|
||||||
262
search_app.py
262
search_app.py
@ -5,6 +5,8 @@ Routes:
|
|||||||
GET / -> static HTML search page.
|
GET / -> static HTML search page.
|
||||||
GET /graph -> static reference graph UI.
|
GET /graph -> static reference graph UI.
|
||||||
GET /api/channels -> channels aggregation.
|
GET /api/channels -> channels aggregation.
|
||||||
|
GET /api/channel-list -> canonical channel list + feed URL.
|
||||||
|
GET /channels.txt -> raw channel URLs list.
|
||||||
GET /api/search -> Elasticsearch keyword search.
|
GET /api/search -> Elasticsearch keyword search.
|
||||||
GET /api/graph -> reference graph API.
|
GET /api/graph -> reference graph API.
|
||||||
GET /api/transcript -> transcript JSON payload.
|
GET /api/transcript -> transcript JSON payload.
|
||||||
@ -17,6 +19,8 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||||
|
|
||||||
@ -25,9 +29,11 @@ from datetime import datetime
|
|||||||
from threading import Lock
|
from threading import Lock
|
||||||
from time import monotonic
|
from time import monotonic
|
||||||
|
|
||||||
|
import requests
|
||||||
from flask import Flask, jsonify, request, send_from_directory
|
from flask import Flask, jsonify, request, send_from_directory
|
||||||
|
|
||||||
from .config import CONFIG, AppConfig
|
from .config import CONFIG, AppConfig
|
||||||
|
from .channel_config import load_channel_entries
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from elasticsearch import Elasticsearch # type: ignore
|
from elasticsearch import Elasticsearch # type: ignore
|
||||||
@ -45,6 +51,10 @@ DEFAULT_ELASTIC_TIMEOUT = int(os.environ.get("ELASTIC_TIMEOUT_SECONDS", "30"))
|
|||||||
|
|
||||||
_RATE_LIMIT_BUCKETS: Dict[str, Deque[float]] = defaultdict(deque)
|
_RATE_LIMIT_BUCKETS: Dict[str, Deque[float]] = defaultdict(deque)
|
||||||
_RATE_LIMIT_LOCK = Lock()
|
_RATE_LIMIT_LOCK = Lock()
|
||||||
|
_RSS_AUTHOR_CACHE: Dict[str, Tuple[str, float]] = {}
|
||||||
|
_RSS_AUTHOR_LOCK = Lock()
|
||||||
|
_RSS_AUTHOR_TTL_SECONDS = 60 * 60 * 24
|
||||||
|
_RSS_OEMBED_LIMIT = 12
|
||||||
|
|
||||||
|
|
||||||
def _client_rate_key() -> str:
|
def _client_rate_key() -> str:
|
||||||
@ -101,6 +111,192 @@ def _ensure_client(config: AppConfig) -> "Elasticsearch":
|
|||||||
return Elasticsearch(config.elastic.url, **kwargs)
|
return Elasticsearch(config.elastic.url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_video_id(url: str) -> Optional[str]:
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
parsed = urllib.parse.urlparse(url.strip())
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
host = (parsed.netloc or "").lower()
|
||||||
|
path = parsed.path or ""
|
||||||
|
if host in {"youtu.be", "www.youtu.be"}:
|
||||||
|
return path.lstrip("/") or None
|
||||||
|
if host.endswith("youtube.com"):
|
||||||
|
if path == "/watch":
|
||||||
|
params = urllib.parse.parse_qs(parsed.query)
|
||||||
|
return (params.get("v") or [None])[0]
|
||||||
|
if path.startswith("/shorts/"):
|
||||||
|
return path.split("/", 2)[2] if len(path.split("/", 2)) > 2 else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _lookup_channel_names(
|
||||||
|
client: "Elasticsearch",
|
||||||
|
index: str,
|
||||||
|
video_ids: Iterable[str],
|
||||||
|
) -> Dict[str, str]:
|
||||||
|
ids = [vid for vid in video_ids if vid]
|
||||||
|
if not ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
now = monotonic()
|
||||||
|
mapping: Dict[str, str] = {}
|
||||||
|
cached_hits = 0
|
||||||
|
elastic_hits = 0
|
||||||
|
remaining = []
|
||||||
|
with _RSS_AUTHOR_LOCK:
|
||||||
|
for vid in ids:
|
||||||
|
cached = _RSS_AUTHOR_CACHE.get(vid)
|
||||||
|
if cached and (now - cached[1]) < _RSS_AUTHOR_TTL_SECONDS:
|
||||||
|
mapping[vid] = cached[0]
|
||||||
|
cached_hits += 1
|
||||||
|
else:
|
||||||
|
remaining.append(vid)
|
||||||
|
|
||||||
|
if remaining:
|
||||||
|
try:
|
||||||
|
response = client.mget(index=index, body={"ids": remaining})
|
||||||
|
except Exception as exc: # pragma: no cover - elasticsearch handles errors
|
||||||
|
LOGGER.warning("RSS title lookup failed: %s", exc)
|
||||||
|
response = {}
|
||||||
|
for doc in response.get("docs", []):
|
||||||
|
if not doc.get("found"):
|
||||||
|
continue
|
||||||
|
source = doc.get("_source") or {}
|
||||||
|
name = source.get("channel_name") or source.get("channel_id")
|
||||||
|
if name:
|
||||||
|
vid = doc.get("_id", "")
|
||||||
|
mapping[vid] = str(name)
|
||||||
|
elastic_hits += 1
|
||||||
|
with _RSS_AUTHOR_LOCK:
|
||||||
|
_RSS_AUTHOR_CACHE[vid] = (mapping[vid], now)
|
||||||
|
|
||||||
|
missing = [vid for vid in remaining if vid not in mapping]
|
||||||
|
oembed_hits = 0
|
||||||
|
oembed_attempts = 0
|
||||||
|
if missing:
|
||||||
|
for vid in missing[:_RSS_OEMBED_LIMIT]:
|
||||||
|
oembed_attempts += 1
|
||||||
|
video_url = f"https://www.youtube.com/watch?v={vid}"
|
||||||
|
oembed_url = (
|
||||||
|
"https://www.youtube.com/oembed?format=json&url="
|
||||||
|
+ urllib.parse.quote(video_url, safe="")
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
response = requests.get(oembed_url, timeout=10)
|
||||||
|
if response.status_code != 200:
|
||||||
|
continue
|
||||||
|
data = response.json()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
author = data.get("author_name")
|
||||||
|
if not author:
|
||||||
|
continue
|
||||||
|
mapping[vid] = str(author)
|
||||||
|
oembed_hits += 1
|
||||||
|
with _RSS_AUTHOR_LOCK:
|
||||||
|
_RSS_AUTHOR_CACHE[vid] = (mapping[vid], now)
|
||||||
|
|
||||||
|
missing_count = max(len(ids) - cached_hits - elastic_hits - oembed_hits, 0)
|
||||||
|
if missing_count or oembed_attempts:
|
||||||
|
LOGGER.info(
|
||||||
|
"RSS title lookup: total=%d cached=%d elastic=%d oembed=%d missing=%d",
|
||||||
|
len(ids),
|
||||||
|
cached_hits,
|
||||||
|
elastic_hits,
|
||||||
|
oembed_hits,
|
||||||
|
missing_count,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
LOGGER.debug(
|
||||||
|
"RSS title lookup: total=%d cached=%d elastic=%d",
|
||||||
|
len(ids),
|
||||||
|
cached_hits,
|
||||||
|
elastic_hits,
|
||||||
|
)
|
||||||
|
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_rss_payload(
|
||||||
|
content: bytes,
|
||||||
|
client: "Elasticsearch",
|
||||||
|
index: str,
|
||||||
|
feed_name: str,
|
||||||
|
) -> bytes:
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(content)
|
||||||
|
except ET.ParseError:
|
||||||
|
LOGGER.warning("RSS rewrite skipped (invalid XML) for %s", feed_name)
|
||||||
|
return content
|
||||||
|
|
||||||
|
channel = root.find("channel")
|
||||||
|
if channel is None:
|
||||||
|
LOGGER.warning("RSS rewrite skipped (missing channel) for %s", feed_name)
|
||||||
|
return content
|
||||||
|
|
||||||
|
items = channel.findall("item")
|
||||||
|
total_items = len(items)
|
||||||
|
removed_errors = 0
|
||||||
|
video_ids: Set[str] = set()
|
||||||
|
for item in list(items):
|
||||||
|
title_el = item.find("title")
|
||||||
|
title_text = (title_el.text or "").strip() if title_el is not None else ""
|
||||||
|
if "Bridge returned error" in title_text:
|
||||||
|
channel.remove(item)
|
||||||
|
removed_errors += 1
|
||||||
|
continue
|
||||||
|
link_el = item.find("link")
|
||||||
|
guid_el = item.find("guid")
|
||||||
|
video_id = _extract_video_id((link_el.text or "") if link_el is not None else "")
|
||||||
|
if not video_id:
|
||||||
|
video_id = _extract_video_id((guid_el.text or "") if guid_el is not None else "")
|
||||||
|
if video_id:
|
||||||
|
video_ids.add(video_id)
|
||||||
|
|
||||||
|
channel_name_map = _lookup_channel_names(client, index, video_ids)
|
||||||
|
if not channel_name_map:
|
||||||
|
LOGGER.info(
|
||||||
|
"RSS rewrite: feed=%s items=%d removed_errors=%d resolved=0",
|
||||||
|
feed_name,
|
||||||
|
total_items,
|
||||||
|
removed_errors,
|
||||||
|
)
|
||||||
|
return ET.tostring(root, encoding="utf-8", xml_declaration=True)
|
||||||
|
|
||||||
|
prefixed = 0
|
||||||
|
for item in channel.findall("item"):
|
||||||
|
title_el = item.find("title")
|
||||||
|
if title_el is None or not title_el.text:
|
||||||
|
continue
|
||||||
|
link_el = item.find("link")
|
||||||
|
guid_el = item.find("guid")
|
||||||
|
video_id = _extract_video_id((link_el.text or "") if link_el is not None else "")
|
||||||
|
if not video_id:
|
||||||
|
video_id = _extract_video_id((guid_el.text or "") if guid_el is not None else "")
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
channel_name = channel_name_map.get(video_id)
|
||||||
|
if not channel_name:
|
||||||
|
continue
|
||||||
|
prefix = f"{channel_name} - "
|
||||||
|
if title_el.text.startswith(prefix):
|
||||||
|
continue
|
||||||
|
title_el.text = f"{channel_name} - {title_el.text}"
|
||||||
|
prefixed += 1
|
||||||
|
|
||||||
|
LOGGER.info(
|
||||||
|
"RSS rewrite: feed=%s items=%d removed_errors=%d prefixed=%d resolved=%d",
|
||||||
|
feed_name,
|
||||||
|
total_items,
|
||||||
|
removed_errors,
|
||||||
|
prefixed,
|
||||||
|
len(channel_name_map),
|
||||||
|
)
|
||||||
|
return ET.tostring(root, encoding="utf-8", xml_declaration=True)
|
||||||
|
|
||||||
|
|
||||||
def metrics_payload(data_root: Path, include_external: bool = True) -> Dict[str, Any]:
|
def metrics_payload(data_root: Path, include_external: bool = True) -> Dict[str, Any]:
|
||||||
total_items = 0
|
total_items = 0
|
||||||
channel_counter: Counter = Counter()
|
channel_counter: Counter = Counter()
|
||||||
@ -1077,6 +1273,72 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
data.sort(key=lambda item: item["Name"].lower())
|
data.sort(key=lambda item: item["Name"].lower())
|
||||||
return jsonify(data)
|
return jsonify(data)
|
||||||
|
|
||||||
|
@app.route("/api/channel-list")
|
||||||
|
def channel_list():
|
||||||
|
payload = {
|
||||||
|
"channels": [],
|
||||||
|
"rss_feed_url": config.rss_feed_url,
|
||||||
|
"source": str(config.channels_path),
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
payload["channels"] = load_channel_entries(config.channels_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
LOGGER.warning("Channel list not found: %s", config.channels_path)
|
||||||
|
payload["error"] = "channels_not_found"
|
||||||
|
except Exception as exc:
|
||||||
|
LOGGER.exception("Failed to load channel list: %s", exc)
|
||||||
|
payload["error"] = "channels_load_failed"
|
||||||
|
return jsonify(payload)
|
||||||
|
|
||||||
|
@app.route("/channels.txt")
|
||||||
|
def channel_urls():
|
||||||
|
try:
|
||||||
|
channels = load_channel_entries(config.channels_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
LOGGER.warning("Channel list not found: %s", config.channels_path)
|
||||||
|
return jsonify({"error": "channels_not_found"}), 404
|
||||||
|
except Exception as exc:
|
||||||
|
LOGGER.exception("Failed to load channel list: %s", exc)
|
||||||
|
return jsonify({"error": "channels_load_failed"}), 500
|
||||||
|
|
||||||
|
urls = [channel["url"] for channel in channels if channel.get("url")]
|
||||||
|
body = "\n".join(urls) + ("\n" if urls else "")
|
||||||
|
return (body, 200, {"Content-Type": "text/plain; charset=utf-8"})
|
||||||
|
|
||||||
|
def _rss_target(feed_name: str) -> str:
|
||||||
|
name = (feed_name or "").strip("/")
|
||||||
|
if not name:
|
||||||
|
name = "youtube-unified"
|
||||||
|
return f"{config.rss_feed_upstream.rstrip('/')}/rss/{name}"
|
||||||
|
|
||||||
|
@app.route("/rss")
|
||||||
|
@app.route("/rss/<path:feed_name>")
|
||||||
|
def rss_proxy(feed_name: str = ""):
|
||||||
|
target = _rss_target(feed_name)
|
||||||
|
try:
|
||||||
|
upstream = requests.get(target, timeout=30)
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
LOGGER.warning("RSS upstream error for %s: %s", target, exc)
|
||||||
|
return jsonify({"error": "rss_unavailable"}), 502
|
||||||
|
|
||||||
|
payload = _rewrite_rss_payload(upstream.content, client, index, feed_name or "rss")
|
||||||
|
headers = {
|
||||||
|
"Content-Type": upstream.headers.get(
|
||||||
|
"Content-Type", "application/xml; charset=UTF-8"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
cache_header = upstream.headers.get("Cache-Control")
|
||||||
|
if cache_header:
|
||||||
|
headers["Cache-Control"] = cache_header
|
||||||
|
etag = upstream.headers.get("ETag")
|
||||||
|
if etag:
|
||||||
|
headers["ETag"] = etag
|
||||||
|
last_modified = upstream.headers.get("Last-Modified")
|
||||||
|
if last_modified:
|
||||||
|
headers["Last-Modified"] = last_modified
|
||||||
|
|
||||||
|
return (payload, upstream.status_code, headers)
|
||||||
|
|
||||||
@app.route("/api/graph")
|
@app.route("/api/graph")
|
||||||
def graph_api():
|
def graph_api():
|
||||||
video_id = (request.args.get("video_id") or "").strip()
|
video_id = (request.args.get("video_id") or "").strip()
|
||||||
|
|||||||
@ -45,6 +45,10 @@
|
|||||||
const aboutBtn = document.getElementById("aboutBtn");
|
const aboutBtn = document.getElementById("aboutBtn");
|
||||||
const aboutPanel = document.getElementById("aboutPanel");
|
const aboutPanel = document.getElementById("aboutPanel");
|
||||||
const aboutCloseBtn = document.getElementById("aboutCloseBtn");
|
const aboutCloseBtn = document.getElementById("aboutCloseBtn");
|
||||||
|
const rssButton = document.getElementById("rssButton");
|
||||||
|
const rssFeedLink = document.getElementById("rssFeedLink");
|
||||||
|
const channelListLink = document.getElementById("channelListLink");
|
||||||
|
const channelCount = document.getElementById("channelCount");
|
||||||
const resultsDiv = document.getElementById("results");
|
const resultsDiv = document.getElementById("results");
|
||||||
const metaDiv = document.getElementById("meta");
|
const metaDiv = document.getElementById("meta");
|
||||||
const metricsContainer = document.getElementById("metrics");
|
const metricsContainer = document.getElementById("metrics");
|
||||||
@ -406,6 +410,57 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function loadChannelListInfo() {
|
||||||
|
if (!rssFeedLink && !channelListLink && !channelCount) return;
|
||||||
|
try {
|
||||||
|
const res = await fetch("/api/channel-list");
|
||||||
|
const payload = await res.json();
|
||||||
|
if (rssFeedLink) {
|
||||||
|
const feedUrl = payload.rss_feed_url || "";
|
||||||
|
if (feedUrl) {
|
||||||
|
rssFeedLink.href = feedUrl;
|
||||||
|
rssFeedLink.textContent = feedUrl;
|
||||||
|
} else {
|
||||||
|
rssFeedLink.textContent = "Unavailable";
|
||||||
|
rssFeedLink.removeAttribute("href");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rssButton) {
|
||||||
|
const feedUrl = payload.rss_feed_url || "";
|
||||||
|
if (feedUrl) {
|
||||||
|
rssButton.href = feedUrl;
|
||||||
|
rssButton.classList.remove("is-disabled");
|
||||||
|
rssButton.removeAttribute("aria-disabled");
|
||||||
|
} else {
|
||||||
|
rssButton.removeAttribute("href");
|
||||||
|
rssButton.classList.add("is-disabled");
|
||||||
|
rssButton.setAttribute("aria-disabled", "true");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (channelCount) {
|
||||||
|
const count = Array.isArray(payload.channels) ? payload.channels.length : 0;
|
||||||
|
channelCount.textContent = count ? `${count} channels` : "No channels loaded";
|
||||||
|
}
|
||||||
|
if (channelListLink && payload.error) {
|
||||||
|
channelListLink.textContent = "Channel list unavailable";
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Failed to load channel list", err);
|
||||||
|
if (rssFeedLink) {
|
||||||
|
rssFeedLink.textContent = "Unavailable";
|
||||||
|
rssFeedLink.removeAttribute("href");
|
||||||
|
}
|
||||||
|
if (rssButton) {
|
||||||
|
rssButton.removeAttribute("href");
|
||||||
|
rssButton.classList.add("is-disabled");
|
||||||
|
rssButton.setAttribute("aria-disabled", "true");
|
||||||
|
}
|
||||||
|
if (channelCount) {
|
||||||
|
channelCount.textContent = "Channel list unavailable";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function updateUrl(q) {
|
function updateUrl(q) {
|
||||||
const next = new URL(window.location.href);
|
const next = new URL(window.location.href);
|
||||||
if (q) {
|
if (q) {
|
||||||
@ -1732,6 +1787,7 @@ window.addEventListener("popstate", () => {
|
|||||||
setFromQuery();
|
setFromQuery();
|
||||||
loadMetrics();
|
loadMetrics();
|
||||||
loadYears();
|
loadYears();
|
||||||
|
loadChannelListInfo();
|
||||||
loadChannels().then(() => runSearch(currentPage));
|
loadChannels().then(() => runSearch(currentPage));
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,22 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="window-body">
|
<div class="window-body">
|
||||||
|
<div class="window-actions">
|
||||||
|
<a
|
||||||
|
id="rssButton"
|
||||||
|
class="rss-button"
|
||||||
|
href="/rss"
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener"
|
||||||
|
title="Unified RSS feed"
|
||||||
|
aria-label="Unified RSS feed"
|
||||||
|
>
|
||||||
|
<svg class="rss-button__icon" viewBox="0 0 24 24" aria-hidden="true">
|
||||||
|
<path d="M6 18a2 2 0 1 0 0 4a2 2 0 0 0 0-4zm-4 6a4 4 0 0 1 4-4a4 4 0 0 1 4 4h-2a2 2 0 0 0-2-2a2 2 0 0 0-2 2zm0-8v-2c6.627 0 12 5.373 12 12h-2c0-5.523-4.477-10-10-10zm0-4V4c11.046 0 20 8.954 20 20h-2c0-9.941-8.059-18-18-18z"/>
|
||||||
|
</svg>
|
||||||
|
<span class="rss-button__label">RSS</span>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
<p>Enter a phrase to query title, description, and transcript text.</p>
|
<p>Enter a phrase to query title, description, and transcript text.</p>
|
||||||
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
@ -129,6 +145,15 @@
|
|||||||
<p>Use the toggles to choose exact, fuzzy, or phrase matching. Query string mode accepts raw Lucene syntax.</p>
|
<p>Use the toggles to choose exact, fuzzy, or phrase matching. Query string mode accepts raw Lucene syntax.</p>
|
||||||
<p>Results are ranked by your chosen sort order; the timeline summarizes the same query.</p>
|
<p>Results are ranked by your chosen sort order; the timeline summarizes the same query.</p>
|
||||||
<p>You can download transcripts, copy MLA citations, or explore references via the graph button.</p>
|
<p>You can download transcripts, copy MLA citations, or explore references via the graph button.</p>
|
||||||
|
<div class="about-panel__section">
|
||||||
|
<div class="about-panel__label">Unified RSS feed</div>
|
||||||
|
<a id="rssFeedLink" href="#" target="_blank" rel="noopener">Loading…</a>
|
||||||
|
</div>
|
||||||
|
<div class="about-panel__section">
|
||||||
|
<div class="about-panel__label">Channel list</div>
|
||||||
|
<a id="channelListLink" href="/api/channel-list" target="_blank" rel="noopener">View JSON</a>
|
||||||
|
<div id="channelCount" class="about-panel__meta"></div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@ -510,6 +510,22 @@ body.modal-open {
|
|||||||
color: #000;
|
color: #000;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.about-panel__section {
|
||||||
|
margin-top: 8px;
|
||||||
|
padding-top: 6px;
|
||||||
|
border-top: 1px solid #c0c0c0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.about-panel__label {
|
||||||
|
font-weight: bold;
|
||||||
|
margin-bottom: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.about-panel__meta {
|
||||||
|
font-size: 10px;
|
||||||
|
color: #555;
|
||||||
|
}
|
||||||
|
|
||||||
.about-panel__header button {
|
.about-panel__header button {
|
||||||
border: none;
|
border: none;
|
||||||
background: transparent;
|
background: transparent;
|
||||||
@ -549,6 +565,50 @@ body.modal-open {
|
|||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.window-actions {
|
||||||
|
display: flex;
|
||||||
|
justify-content: flex-end;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rss-button {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 4px;
|
||||||
|
padding: 2px 6px;
|
||||||
|
border: 1px solid;
|
||||||
|
border-color: ButtonHighlight ButtonShadow ButtonShadow ButtonHighlight;
|
||||||
|
background: ButtonFace;
|
||||||
|
color: #000;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 11px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rss-button:hover {
|
||||||
|
background: #f3f3f3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rss-button:active {
|
||||||
|
border-color: ButtonShadow ButtonHighlight ButtonHighlight ButtonShadow;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rss-button.is-disabled {
|
||||||
|
opacity: 0.5;
|
||||||
|
cursor: default;
|
||||||
|
pointer-events: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rss-button__icon {
|
||||||
|
width: 14px;
|
||||||
|
height: 14px;
|
||||||
|
fill: #f38b00;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rss-button__label {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
/* Badges */
|
/* Badges */
|
||||||
.badge-row {
|
.badge-row {
|
||||||
margin-top: 6px;
|
margin-top: 6px;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user