"""Recipe helpers backed by the vault's structured Recipe Library.

Recipes live as one structured note per file under ``Recipes/<Cookbook>/`` (and
``Recipes/Web/``), following ``Templates/Recipe.md``: YAML frontmatter, then
``## Ingredients`` (bullets) and ``## Steps`` (numbered). We read those notes
directly — no web scraping needed.

Legacy web links in the flat ``Recipes.md`` are still honored as a fallback so
older saved recipes stay reachable; those are fetched with ``primp`` (browser
impersonation, since sites like Serious Eats bot-block plain HTTP) and parsed
from the page's schema.org/Recipe JSON-LD.
"""

import json
import re
from datetime import date
from pathlib import Path
from urllib.parse import urlparse

import primp

import vault

RECIPES_DIR = "Recipes"
WEB_DIR = "Recipes/Web"
LEGACY_FILE = "Recipes.md"

_LINK_RE = re.compile(r"-\s*\[([^\]]+)\]\((https?://[^)]+)\)")
_LDJSON_RE = re.compile(r'<script[^>]*application/ld\+json[^>]*>(.*?)</script>', re.S | re.I)
_FM_RE = re.compile(r"^---\n(.*?)\n---", re.S)


def _iter_recipe_files() -> list[Path]:
    root = vault.VAULT / RECIPES_DIR
    if not root.exists():
        return []
    files = []
    for p in sorted(root.rglob("*.md")):
        parts = {x.lower() for x in p.relative_to(root).parts}
        if p.name.lower() == "readme.md" or "_inbox" in parts:
            continue
        files.append(p)
    return files


def _title_of(path: Path, text: str) -> str:
    m = _FM_RE.search(text)
    if m:
        tm = re.search(r"^title:\s*(.+)$", m.group(1), re.M)
        if tm and tm.group(1).strip():
            return tm.group(1).strip()
    return path.stem


def list_saved_recipes() -> list[tuple[str, str]]:
    """Return ``(title, locator)`` for every saved recipe.

    The locator is a vault-relative note path for structured recipes, or an http
    URL for legacy ``Recipes.md`` entries.
    """
    out: list[tuple[str, str]] = []
    for p in _iter_recipe_files():
        out.append((_title_of(p, p.read_text()), str(p.relative_to(vault.VAULT))))
    legacy = vault.read_text(LEGACY_FILE) or ""
    out.extend(_LINK_RE.findall(legacy))
    return out


def find_recipe(query: str) -> tuple[str, str] | None:
    """Find a saved recipe by fuzzy (substring, then token-overlap) title match."""
    query = query.lower().strip()
    saved = list_saved_recipes()
    if not saved:
        return None
    for title, loc in saved:
        if query and query in title.lower():
            return (title, loc)
    q_words = set(re.findall(r"[a-z]+", query))
    best, best_score = None, 0
    for title, loc in saved:
        score = len(q_words & set(re.findall(r"[a-z]+", title.lower())))
        if score > best_score:
            best, best_score = (title, loc), score
    return best if best_score else None


def _section(text: str, heading: str) -> list[str]:
    """Return the lines under a '## heading' up to the next '## '."""
    lines = text.splitlines()
    start = next(
        (
            i
            for i, ln in enumerate(lines)
            if ln.strip().lower() == f"## {heading}".lower()
        ),
        None,
    )
    if start is None:
        return []
    body = []
    for ln in lines[start + 1 :]:
        if ln.lstrip().startswith("## "):
            break
        body.append(ln)
    return body


def _parse_note(path: Path) -> dict:
    text = path.read_text()
    url = ""
    fm = _FM_RE.search(text)
    if fm:
        um = re.search(r"^url:\s*(\S.*)$", fm.group(1), re.M)
        if um:
            url = um.group(1).strip()
    ingredients = [
        m.group(1).strip()
        for ln in _section(text, "Ingredients")
        if (m := re.match(r"\s*[-*]\s+(.+)", ln))
    ]
    steps = [
        m.group(1).strip()
        for ln in _section(text, "Steps")
        if (m := re.match(r"\s*\d+[.)]\s+(.+)", ln))
    ]
    return {
        "name": _title_of(path, text),
        "ingredients": ingredients,
        "steps": steps,
        "url": url,
    }


def load_recipe(locator: str) -> dict | None:
    """Load a recipe's name/ingredients/steps by locator (note path or http URL)."""
    if locator.startswith("http"):
        return _fetch_web(locator)
    note = vault.VAULT / locator
    if not note.exists():
        return None
    data = _parse_note(note)
    # If the note has no transcribed steps but links out, fall back to the web.
    if not data["steps"] and data.get("url", "").startswith("http"):
        return _fetch_web(data["url"]) or data
    return data


def _walk_instructions(instr) -> list[str]:
    """Flatten schema.org recipeInstructions into a list of step strings."""
    steps: list[str] = []
    if isinstance(instr, str):
        text = re.sub(r"<[^>]+>", "", instr).strip()
        if text:
            steps.append(text)
    elif isinstance(instr, list):
        for item in instr:
            steps.extend(_walk_instructions(item))
    elif isinstance(instr, dict):
        if instr.get("@type") == "HowToSection" or "itemListElement" in instr:
            steps.extend(_walk_instructions(instr.get("itemListElement", [])))
        else:
            text = re.sub(r"<[^>]+>", "", instr.get("text") or instr.get("name") or "")
            if text.strip():
                steps.append(text.strip())
    return steps


def _fetch_web(url: str) -> dict | None:
    """Fetch a recipe web page and parse its JSON-LD into name/ingredients/steps."""
    client = primp.Client(impersonate="chrome", impersonate_os="macos", timeout=20)
    resp = client.get(url)
    if resp.status_code != 200:
        return None
    for block in _LDJSON_RE.findall(resp.text):
        try:
            data = json.loads(block)
        except json.JSONDecodeError:
            continue
        items = data if isinstance(data, list) else data.get("@graph", [data])
        for it in items if isinstance(items, list) else [items]:
            if not isinstance(it, dict):
                continue
            types = it.get("@type", [])
            types = types if isinstance(types, list) else [types]
            if "Recipe" in types:
                return {
                    "name": it.get("name", ""),
                    "ingredients": [i.strip() for i in it.get("recipeIngredient", []) if i],
                    "steps": _walk_instructions(it.get("recipeInstructions", [])),
                    "url": url,
                    "author": _author_name(it.get("author")),
                    "servings": _yield_str(it.get("recipeYield")),
                    "total_time": _iso_duration_to_human(
                        it.get("totalTime") or it.get("cookTime") or it.get("prepTime")
                    ),
                    "source": urlparse(url).netloc.replace("www.", ""),
                }
    return None


def _author_name(a) -> str:
    if isinstance(a, dict):
        return str(a.get("name", "")).strip()
    if isinstance(a, list) and a:
        return _author_name(a[0])
    if isinstance(a, str):
        return a.strip()
    return ""


def _yield_str(y) -> str:
    if isinstance(y, list):
        y = y[-1] if y else ""
    return str(y).strip()


def _iso_duration_to_human(d) -> str:
    """Convert an ISO-8601 duration like 'PT1H30M' to '1 hour 30 minutes'."""
    if not isinstance(d, str):
        return ""
    m = re.search(r"PT(?:(\d+)H)?(?:(\d+)M)?", d)
    if not m:
        return ""
    hours, mins = int(m.group(1) or 0), int(m.group(2) or 0)
    parts = []
    if hours:
        parts.append(f"{hours} hour" + ("s" if hours > 1 else ""))
    if mins:
        parts.append(f"{mins} minute" + ("s" if mins > 1 else ""))
    return " ".join(parts)


def _safe_filename(title: str) -> str:
    name = re.sub(r'[/\\:*?"<>|]', " ", title).strip()
    name = re.sub(r"\s+", " ", name)
    return (name or "Untitled Recipe")[:120]


def import_web_recipe(url: str) -> str | None:
    """Fetch a recipe URL and save it as a structured note in Recipes/Web/.

    Returns the vault-relative path of the note (existing or newly written), or
    None if no recipe could be extracted from the page.
    """
    data = _fetch_web(url)
    if not data or not (data.get("ingredients") or data.get("steps")):
        return None
    title = data.get("name") or "Untitled Recipe"
    rel = f"{WEB_DIR}/{_safe_filename(title)}.md"
    note = vault.VAULT / rel
    note.parent.mkdir(parents=True, exist_ok=True)
    if note.exists():
        return rel  # already imported; don't clobber the user's edits

    fm = [
        "---",
        "type: recipe",
        f"title: {title}",
        f"source: {data.get('source', '')}",
        "cookbook:",
        f"author: {data.get('author', '')}",
        f"url: {url}",
        "page:",
        f"servings: {data.get('servings', '')}",
        f"total_time: {data.get('total_time', '')}",
        "tags: [recipe/]",
        "rating:",
        "last_made:",
        "---",
        "",
    ]
    body = ["## Ingredients"]
    body += [f"- {i}" for i in data["ingredients"]]
    body += ["", "## Steps"]
    body += [f"{n}. {s}" for n, s in enumerate(data["steps"], 1)]
    body += ["", "## Notes", f"<!-- Imported from {url} on {date.today():%Y.%m.%d} -->", ""]
    note.write_text("\n".join(fm + body))
    return rel


if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print("usage: python recipes.py <recipe-url>")
        sys.exit(1)
    result = import_web_recipe(sys.argv[1])
    if result:
        print(f"Saved: {result}")
    else:
        print("Could not extract a recipe from that URL.")
        sys.exit(2)
