TLC-Search/channel_config.py

from __future__ import annotations

import json
import re
from pathlib import Path
from typing import Any, Dict, List, Optional

_CHANNEL_ID_PATTERN = re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/channel/([^/?#]+)")
_HANDLE_PATTERN = re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/@([^/?#]+)")


def _strip_quotes(value: str) -> str:
    if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
        return value[1:-1]
    return value


def _parse_yaml_channels(text: str) -> List[Dict[str, str]]:
    channels: List[Dict[str, str]] = []
    current: Dict[str, str] = {}

    for raw_line in text.splitlines():
        line = raw_line.strip()
        if not line or line.startswith("#"):
            continue
        if line == "channels:":
            continue
        if line.startswith("- "):
            if current:
                channels.append(current)
            current = {}
            line = line[2:].strip()
        if not line:
            continue
        if ":" not in line:
            continue
        key, value = line.split(":", 1)
        current[key.strip()] = _strip_quotes(value.strip())

    if current:
        channels.append(current)
    return channels


def _extract_from_url(url: str) -> Dict[str, Optional[str]]:
    channel_id = None
    handle = None

    channel_match = _CHANNEL_ID_PATTERN.search(url)
    if channel_match:
        channel_id = channel_match.group(1)

    handle_match = _HANDLE_PATTERN.search(url)
    if handle_match:
        handle = handle_match.group(1)

    return {"id": channel_id, "handle": handle}


def _normalize_handle(handle: Optional[str]) -> Optional[str]:
    if not handle:
        return None
    return handle.lstrip("@").strip() or None


def _parse_bool(value: Optional[object]) -> Optional[bool]:
    if isinstance(value, bool):
        return value
    if value is None:
        return None
    text = str(value).strip().lower()
    if text in {"1", "true", "yes", "y"}:
        return True
    if text in {"0", "false", "no", "n"}:
        return False
    return None


def _normalize_entry(entry: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    channel_id = entry.get("id") or entry.get("channel_id")
    handle = _normalize_handle(entry.get("handle") or entry.get("username"))
    url = entry.get("url")
    name = entry.get("name")
    rss_flag = _parse_bool(
        entry.get("rss_enabled") or entry.get("rss") or entry.get("include_in_feed")
    )

    if url:
        extracted = _extract_from_url(url)
        channel_id = channel_id or extracted.get("id")
        handle = handle or extracted.get("handle")

    if not url:
        if channel_id:
            url = f"https://www.youtube.com/channel/{channel_id}"
        elif handle:
            url = f"https://www.youtube.com/@{handle}"

    if not name:
        name = handle or channel_id

    if not name or not url:
        return None

    normalized = {
        "id": channel_id or "",
        "handle": handle or "",
        "name": name,
        "url": url,
        "rss_enabled": True if rss_flag is None else rss_flag,
    }
    return normalized


def load_channel_entries(path: Path) -> List[Dict[str, str]]:
    if not path.exists():
        raise FileNotFoundError(path)

    if path.suffix.lower() == ".json":
        payload = json.loads(path.read_text(encoding="utf-8"))
        if isinstance(payload, dict):
            raw_entries = payload.get("channels", [])
        else:
            raw_entries = payload
    else:
        raw_entries = _parse_yaml_channels(path.read_text(encoding="utf-8"))

    entries: List[Dict[str, str]] = []
    for raw in raw_entries:
        if not isinstance(raw, dict):
            continue
        raw_payload: Dict[str, Any] = {}
        for key, value in raw.items():
            if value is None:
                continue
            if isinstance(value, bool):
                raw_payload[str(key).strip()] = value
            else:
                raw_payload[str(key).strip()] = str(value).strip()
        normalized = _normalize_entry(raw_payload)
        if normalized:
            entries.append(normalized)

    entries.sort(key=lambda item: item["name"].lower())
    return entries


def build_rss_bridge_url(entry: Dict[str, str], rss_bridge_host: str = "rss-bridge") -> Optional[str]:
    channel_id = entry.get("id") or ""
    handle = _normalize_handle(entry.get("handle"))

    if channel_id:
        return (
            f"http://{rss_bridge_host}/?action=display&bridge=YoutubeBridge"
            f"&context=By+channel+id&c={channel_id}&format=Mrss"
        )
    if handle:
        return (
            f"http://{rss_bridge_host}/?action=display&bridge=YoutubeBridge"
            f"&context=By+username&u={handle}&format=Mrss"
        )
    return None