TLC-Search/channel_config.py
2026-01-08 22:53:30 -05:00

163 lines
4.7 KiB
Python

from __future__ import annotations
import json
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
_CHANNEL_ID_PATTERN = re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/channel/([^/?#]+)")
_HANDLE_PATTERN = re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/@([^/?#]+)")
def _strip_quotes(value: str) -> str:
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
return value[1:-1]
return value
def _parse_yaml_channels(text: str) -> List[Dict[str, str]]:
channels: List[Dict[str, str]] = []
current: Dict[str, str] = {}
for raw_line in text.splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if line == "channels:":
continue
if line.startswith("- "):
if current:
channels.append(current)
current = {}
line = line[2:].strip()
if not line:
continue
if ":" not in line:
continue
key, value = line.split(":", 1)
current[key.strip()] = _strip_quotes(value.strip())
if current:
channels.append(current)
return channels
def _extract_from_url(url: str) -> Dict[str, Optional[str]]:
channel_id = None
handle = None
channel_match = _CHANNEL_ID_PATTERN.search(url)
if channel_match:
channel_id = channel_match.group(1)
handle_match = _HANDLE_PATTERN.search(url)
if handle_match:
handle = handle_match.group(1)
return {"id": channel_id, "handle": handle}
def _normalize_handle(handle: Optional[str]) -> Optional[str]:
if not handle:
return None
return handle.lstrip("@").strip() or None
def _parse_bool(value: Optional[object]) -> Optional[bool]:
if isinstance(value, bool):
return value
if value is None:
return None
text = str(value).strip().lower()
if text in {"1", "true", "yes", "y"}:
return True
if text in {"0", "false", "no", "n"}:
return False
return None
def _normalize_entry(entry: Dict[str, Any]) -> Optional[Dict[str, Any]]:
channel_id = entry.get("id") or entry.get("channel_id")
handle = _normalize_handle(entry.get("handle") or entry.get("username"))
url = entry.get("url")
name = entry.get("name")
rss_flag = _parse_bool(
entry.get("rss_enabled") or entry.get("rss") or entry.get("include_in_feed")
)
if url:
extracted = _extract_from_url(url)
channel_id = channel_id or extracted.get("id")
handle = handle or extracted.get("handle")
if not url:
if channel_id:
url = f"https://www.youtube.com/channel/{channel_id}"
elif handle:
url = f"https://www.youtube.com/@{handle}"
if not name:
name = handle or channel_id
if not name or not url:
return None
normalized = {
"id": channel_id or "",
"handle": handle or "",
"name": name,
"url": url,
"rss_enabled": True if rss_flag is None else rss_flag,
}
return normalized
def load_channel_entries(path: Path) -> List[Dict[str, str]]:
if not path.exists():
raise FileNotFoundError(path)
if path.suffix.lower() == ".json":
payload = json.loads(path.read_text(encoding="utf-8"))
if isinstance(payload, dict):
raw_entries = payload.get("channels", [])
else:
raw_entries = payload
else:
raw_entries = _parse_yaml_channels(path.read_text(encoding="utf-8"))
entries: List[Dict[str, str]] = []
for raw in raw_entries:
if not isinstance(raw, dict):
continue
raw_payload: Dict[str, Any] = {}
for key, value in raw.items():
if value is None:
continue
if isinstance(value, bool):
raw_payload[str(key).strip()] = value
else:
raw_payload[str(key).strip()] = str(value).strip()
normalized = _normalize_entry(raw_payload)
if normalized:
entries.append(normalized)
entries.sort(key=lambda item: item["name"].lower())
return entries
def build_rss_bridge_url(entry: Dict[str, str], rss_bridge_host: str = "rss-bridge") -> Optional[str]:
channel_id = entry.get("id") or ""
handle = _normalize_handle(entry.get("handle"))
if channel_id:
return (
f"http://{rss_bridge_host}/?action=display&bridge=YoutubeBridge"
f"&context=By+channel+id&c={channel_id}&format=Mrss"
)
if handle:
return (
f"http://{rss_bridge_host}/?action=display&bridge=YoutubeBridge"
f"&context=By+username&u={handle}&format=Mrss"
)
return None