#!/usr/bin/env python3

from __future__ import annotations

import argparse
import base64
import csv
import html
import json
import os
import re
import shutil
import subprocess
import sys
from collections import Counter, defaultdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any


PROJECT_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_OUTPUT_DIR = PROJECT_ROOT / "output" / "spreadsheet"
DEFAULT_QUEUE_DIR = DEFAULT_OUTPUT_DIR / "translation_queue"
DEFAULT_SNAPSHOT_PATH = DEFAULT_OUTPUT_DIR / "content_items_live_snapshot.json"
WRONG_LANGUAGE_PATH = DEFAULT_OUTPUT_DIR / "wrong_language_rows.csv"
MIXED_PLACEHOLDER_PATH = DEFAULT_OUTPUT_DIR / "mixed_placeholder_rows.csv"
MISSING_LOCALES_PATH = DEFAULT_OUTPUT_DIR / "missing_locales_by_group.csv"
SUMMARY_PATH = DEFAULT_OUTPUT_DIR / "sql_audit_summary.txt"
MANIFEST_PATH = DEFAULT_QUEUE_DIR / "manifest.csv"
README_PATH = DEFAULT_QUEUE_DIR / "README.md"

SUPPORTED_LOCALES = ["en", "fr", "es", "ru", "de", "ka", "it", "ar", "he", "pl", "tr"]
SHELL_FIELDS = ["title", "subtitle", "question"]
TRANSLATABLE_FIELDS = [
    "title",
    "subtitle",
    "excerpt",
    "content",
    "meta_title",
    "meta_description",
]
CONTENT_FIELDS = ["excerpt", "content", "meta_title", "meta_description"]

REMOTE_HOST = os.environ.get("BENT_LIVE_SSH_HOST", "ontripge@78.140.140.243")
REMOTE_KEY = os.environ.get("BENT_LIVE_SSH_KEY", str(Path.home() / ".ssh" / "macmini"))
REMOTE_APP_DIR = os.environ.get("BENT_LIVE_APP_DIR", "/home/ontripge/domains/api.hub.bent.ge")
SYNC_SCRIPT = PROJECT_ROOT / "scripts" / "sync-site-content.sh"

CSV_DIALECT = csv.excel

STOPWORDS = {
    "en": {
        "the",
        "and",
        "with",
        "for",
        "from",
        "your",
        "into",
        "this",
        "that",
        "car",
        "rental",
        "georgia",
        "drive",
        "road",
        "trip",
    },
    "fr": {
        "avec",
        "pour",
        "dans",
        "vous",
        "votre",
        "location",
        "voiture",
        "géorgie",
        "route",
        "conduite",
        "les",
        "des",
        "une",
        "est",
    },
    "es": {
        "para",
        "con",
        "coche",
        "alquiler",
        "georgia",
        "carretera",
        "ruta",
        "usted",
        "puede",
        "este",
        "esta",
        "los",
        "las",
        "que",
    },
    "de": {
        "mit",
        "für",
        "auto",
        "mietwagen",
        "georgien",
        "straße",
        "fahrt",
        "reise",
        "und",
        "der",
        "die",
        "das",
        "eine",
        "einen",
    },
    "it": {
        "con",
        "per",
        "auto",
        "noleggio",
        "georgia",
        "strada",
        "viaggio",
        "questa",
        "questo",
        "puoi",
        "della",
        "delle",
        "una",
        "uno",
    },
    "pl": {
        "wynajem",
        "samochodu",
        "gruzji",
        "droga",
        "podróż",
        "trasa",
        "samochód",
        "możesz",
        "oraz",
        "dla",
        "jest",
        "nie",
        "przez",
        "na",
    },
    "tr": {
        "araç",
        "kiralama",
        "gürcistan",
        "yol",
        "seyahat",
        "rota",
        "için",
        "ile",
        "bu",
        "bir",
        "ve",
        "olarak",
        "olan",
        "içinde",
    },
}

SCRIPT_REGEX = {
    "ru": re.compile(r"[А-Яа-яЁё]"),
    "ka": re.compile(r"[\u10A0-\u10FF]"),
    "ar": re.compile(r"[\u0600-\u06FF]"),
    "he": re.compile(r"[\u0590-\u05FF]"),
}


def now_iso() -> str:
    return datetime.now(timezone.utc).replace(microsecond=0).isoformat()


def ssh_command() -> list[str]:
    return [
        "ssh",
        "-o",
        "ClearAllForwardings=yes",
        "-i",
        REMOTE_KEY,
        "-o",
        "IdentitiesOnly=yes",
        REMOTE_HOST,
        f"cd {shell_quote(REMOTE_APP_DIR)} && php",
    ]


def shell_quote(value: str) -> str:
    return "'" + value.replace("'", "'\"'\"'") + "'"


def run_remote_php(php_code: str) -> str:
    process = subprocess.run(
        ssh_command(),
        input=php_code,
        text=True,
        capture_output=True,
        check=False,
    )
    if process.returncode != 0:
        raise RuntimeError(
            "Remote PHP execution failed.\n"
            f"stdout:\n{process.stdout}\n"
            f"stderr:\n{process.stderr}"
        )
    return process.stdout


def fetch_live_rows() -> list[dict[str, Any]]:
    php_code = """<?php
require __DIR__ . '/vendor/autoload.php';
$app = require __DIR__ . '/bootstrap/app.php';
$kernel = $app->make(Illuminate\\Contracts\\Console\\Kernel::class);
$kernel->bootstrap();

$rows = Illuminate\\Support\\Facades\\DB::table('content_items')
    ->orderBy('type')
    ->orderBy('content_key')
    ->orderBy('locale')
    ->get()
    ->map(fn ($row) => (array) $row)
    ->all();

echo json_encode(
    [
        'generated_at' => gmdate('c'),
        'rows' => $rows,
    ],
    JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
);
"""
    payload = json.loads(run_remote_php(php_code))
    return payload["rows"]


def load_snapshot(snapshot_path: Path) -> dict[str, Any]:
    return json.loads(snapshot_path.read_text(encoding="utf-8"))


def save_snapshot(rows: list[dict[str, Any]], snapshot_path: Path) -> dict[str, Any]:
    payload = {
        "generated_at": now_iso(),
        "row_count": len(rows),
        "rows": rows,
    }
    snapshot_path.parent.mkdir(parents=True, exist_ok=True)
    snapshot_path.write_text(
        json.dumps(payload, ensure_ascii=False, indent=2),
        encoding="utf-8",
    )
    return payload


def normalize_text(value: Any) -> str:
    if value is None:
        return ""
    if isinstance(value, (dict, list)):
        value = json.dumps(value, ensure_ascii=False, sort_keys=True)
    text = str(value)
    text = html.unescape(text)
    text = text.replace("\r\n", "\n").replace("\r", "\n")
    text = re.sub(r"<[^>]+>", " ", text)
    text = re.sub(r"[ \t]+", " ", text)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()


def csv_cell(value: Any) -> str:
    if value is None:
        return ""
    if isinstance(value, (dict, list)):
        return json.dumps(value, ensure_ascii=False)
    return str(value)


def row_title(row: dict[str, Any] | None) -> str:
    if not row:
        return ""
    return csv_cell(row.get("title") or row.get("question") or "")


def row_field(row: dict[str, Any] | None, field: str) -> str:
    if not row:
        return ""
    if field == "title":
        return row_title(row)
    return csv_cell(row.get(field, ""))


def group_rows(rows: list[dict[str, Any]]) -> dict[tuple[str, str], dict[str, dict[str, Any]]]:
    grouped: dict[tuple[str, str], dict[str, dict[str, Any]]] = defaultdict(dict)
    for row in rows:
        grouped[(csv_cell(row.get("type")), csv_cell(row.get("content_key")))][
            csv_cell(row.get("locale"))
        ] = row
    return grouped


def detect_script_language(text: str) -> str | None:
    for locale, pattern in SCRIPT_REGEX.items():
        if len(pattern.findall(text)) >= 3:
            return locale
    return None


def tokenize_latin(text: str) -> list[str]:
    lowered = text.lower()
    return re.findall(r"[a-zà-ÿąćęłńóśźżğüşöçıîâêôûëïü]+", lowered)


def detect_latin_language(text: str) -> tuple[str | None, dict[str, int]]:
    tokens = tokenize_latin(text)
    scores = {locale: 0 for locale in STOPWORDS}
    if not tokens:
        return None, scores
    counter = Counter(tokens)
    for locale, words in STOPWORDS.items():
        scores[locale] = sum(counter[word] for word in words)
    ordered = sorted(scores.items(), key=lambda item: item[1], reverse=True)
    best_locale, best_score = ordered[0]
    second_score = ordered[1][1] if len(ordered) > 1 else 0
    if best_score >= 3 and best_score >= second_score + 2:
        return best_locale, scores
    return None, scores


def detect_language(text: str) -> tuple[str | None, str]:
    script_locale = detect_script_language(text)
    if script_locale:
        return script_locale, f"script:{script_locale}"
    latin_locale, scores = detect_latin_language(text)
    if latin_locale:
        return latin_locale, "latin:" + ",".join(
            f"{locale}={score}" for locale, score in sorted(scores.items())
        )
    return None, "unknown"


def build_text_bundle(row: dict[str, Any]) -> str:
    parts = [row_title(row)]
    for field in ["subtitle", "excerpt", "content", "meta_title", "meta_description"]:
        value = normalize_text(row.get(field))
        if value:
            parts.append(value)
    return "\n".join(parts)


def placeholder_fields(row: dict[str, Any], source_row: dict[str, Any]) -> list[str]:
    matched: list[str] = []
    for field in CONTENT_FIELDS:
        source_value = normalize_text(row_field(source_row, field))
        target_value = normalize_text(row_field(row, field))
        if source_value and source_value == target_value:
            matched.append(field)
    return matched


def localized_shell_fields(row: dict[str, Any], source_row: dict[str, Any]) -> list[str]:
    changed: list[str] = []
    for field in SHELL_FIELDS:
        source_value = normalize_text(row_field(source_row, field))
        target_value = normalize_text(row_field(row, field))
        if target_value and target_value != source_value:
            changed.append(field)
    return changed


def run_audit(rows: list[dict[str, Any]]) -> dict[str, Any]:
    grouped = group_rows(rows)
    wrong_language: list[dict[str, Any]] = []
    mixed_placeholder: list[dict[str, Any]] = []
    missing_locales: list[dict[str, Any]] = []

    for (content_type, content_key), locales_map in sorted(grouped.items()):
        source_row = locales_map.get("en")
        present_locales = sorted(locales_map.keys())
        missing = [locale for locale in SUPPORTED_LOCALES if locale not in locales_map]
        if missing:
            missing_locales.append(
                {
                    "type": content_type,
                    "content_key": content_key,
                    "present_count": len(present_locales),
                    "missing_count": len(missing),
                    "present_locales": ",".join(present_locales),
                    "missing_locales": ",".join(missing),
                    "source_row_id": csv_cell(source_row.get("id")) if source_row else "",
                    "source_available": "yes" if source_row else "no",
                }
            )

        for locale, row in sorted(locales_map.items()):
            if locale == "en":
                continue
            text_bundle = build_text_bundle(row)
            detected_locale, detection_note = detect_language(text_bundle)
            if detected_locale and detected_locale not in {locale, "en"}:
                wrong_language.append(
                    {
                        "id": csv_cell(row.get("id")),
                        "type": content_type,
                        "content_key": content_key,
                        "locale": locale,
                        "detected_language": detected_locale,
                        "detected_field": "bundle",
                        "detected_sample": normalize_text(text_bundle)[:220],
                        "issue_note": detection_note,
                    }
                )
                continue

            if not source_row:
                continue

            shell_changes = localized_shell_fields(row, source_row)
            placeholders = placeholder_fields(row, source_row)
            if shell_changes and placeholders:
                mixed_placeholder.append(
                    {
                        "id": csv_cell(row.get("id")),
                        "type": content_type,
                        "content_key": content_key,
                        "locale": locale,
                        "source_row_id": csv_cell(source_row.get("id")),
                        "localized_shell_fields": ",".join(shell_changes),
                        "placeholder_fields": ",".join(placeholders),
                        "issue_note": "localized shell with english source fields still copied",
                    }
                )

    return {
        "grouped": grouped,
        "wrong_language": wrong_language,
        "mixed_placeholder": mixed_placeholder,
        "missing_locales": missing_locales,
    }


def write_csv(path: Path, fieldnames: list[str], rows: list[dict[str, Any]]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open("w", encoding="utf-8", newline="") as handle:
        writer = csv.DictWriter(
            handle,
            fieldnames=fieldnames,
            dialect=CSV_DIALECT,
            lineterminator="\n",
        )
        writer.writeheader()
        for row in rows:
            writer.writerow({field: csv_cell(row.get(field, "")) for field in fieldnames})


def write_summary(path: Path, rows: list[dict[str, Any]], audit: dict[str, Any]) -> None:
    grouped = audit["grouped"]
    missing_rows = audit["missing_locales"]
    mixed_rows = audit["mixed_placeholder"]
    wrong_rows = audit["wrong_language"]

    lines = [
        f"generated_at: {now_iso()}",
        f"total_rows: {len(rows)}",
        f"groups: {len(grouped)}",
        f"true_wrong_language_rows: {len(wrong_rows)}",
        f"mixed_placeholder_rows: {len(mixed_rows)}",
        f"groups_with_missing_locales: {len(missing_rows)}",
        "",
        "wrong_language_rows:",
    ]
    if wrong_rows:
        for row in wrong_rows:
            lines.append(
                f"- id={row['id']} type={row['type']} key={row['content_key']} "
                f"locale={row['locale']} detected={row['detected_language']}"
            )
    else:
        lines.append("- none")

    lines.extend(["", "groups_with_missing_locales:"])
    if missing_rows:
        for row in missing_rows:
            lines.append(
                f"- {row['type']} / {row['content_key']} -> missing: {row['missing_locales']}"
            )
    else:
        lines.append("- none")

    lines.extend(["", "mixed_placeholder_breakdown:"])
    mixed_counter = Counter(row["type"] for row in mixed_rows)
    if mixed_counter:
        for content_type, count in sorted(mixed_counter.items()):
            lines.append(f"- {content_type}: {count}")
    else:
        lines.append("- none")

    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text("\n".join(lines) + "\n", encoding="utf-8")


def audit_to_disk(rows: list[dict[str, Any]], output_dir: Path, snapshot_path: Path) -> dict[str, Any]:
    output_dir.mkdir(parents=True, exist_ok=True)
    save_snapshot(rows, snapshot_path)
    audit = run_audit(rows)

    write_csv(
        output_dir / WRONG_LANGUAGE_PATH.name,
        [
            "id",
            "type",
            "content_key",
            "locale",
            "detected_language",
            "detected_field",
            "detected_sample",
            "issue_note",
        ],
        audit["wrong_language"],
    )
    write_csv(
        output_dir / MIXED_PLACEHOLDER_PATH.name,
        [
            "id",
            "type",
            "content_key",
            "locale",
            "source_row_id",
            "localized_shell_fields",
            "placeholder_fields",
            "issue_note",
        ],
        audit["mixed_placeholder"],
    )
    write_csv(
        output_dir / MISSING_LOCALES_PATH.name,
        [
            "type",
            "content_key",
            "present_count",
            "missing_count",
            "present_locales",
            "missing_locales",
            "source_row_id",
            "source_available",
        ],
        audit["missing_locales"],
    )
    write_summary(output_dir / SUMMARY_PATH.name, rows, audit)
    return audit


def translation_row(
    issue_type: str,
    content_type: str,
    content_key: str,
    target_locale: str,
    source_row: dict[str, Any] | None,
    target_row: dict[str, Any] | None,
    issue_note: str = "",
    det_title: str = "",
    det_excerpt: str = "",
    det_content: str = "",
) -> dict[str, Any]:
    status_current = row_field(target_row, "status") or row_field(source_row, "status")
    visibility_current = row_field(target_row, "visibility") or row_field(source_row, "visibility")
    return {
        "issue_type": issue_type,
        "type": content_type,
        "content_key": content_key,
        "target_locale": target_locale,
        "target_row_id": row_field(target_row, "id"),
        "source_locale": "en",
        "source_row_id": row_field(source_row, "id"),
        "status_current": status_current,
        "visibility_current": visibility_current,
        "source_title": row_field(source_row, "title"),
        "source_subtitle": row_field(source_row, "subtitle"),
        "source_excerpt": row_field(source_row, "excerpt"),
        "source_content": row_field(source_row, "content"),
        "source_meta_title": row_field(source_row, "meta_title"),
        "source_meta_description": row_field(source_row, "meta_description"),
        "current_title": row_field(target_row, "title"),
        "current_subtitle": row_field(target_row, "subtitle"),
        "current_excerpt": row_field(target_row, "excerpt"),
        "current_content": row_field(target_row, "content"),
        "current_meta_title": row_field(target_row, "meta_title"),
        "current_meta_description": row_field(target_row, "meta_description"),
        "target_title": "",
        "target_subtitle": "",
        "target_excerpt": "",
        "target_content": "",
        "target_meta_title": "",
        "target_meta_description": "",
        "det_title": det_title,
        "det_excerpt": det_excerpt,
        "det_content": det_content,
        "issue_note": issue_note,
    }


def queue_fieldnames() -> list[str]:
    return [
        "issue_type",
        "type",
        "content_key",
        "target_locale",
        "target_row_id",
        "source_locale",
        "source_row_id",
        "status_current",
        "visibility_current",
        "source_title",
        "source_subtitle",
        "source_excerpt",
        "source_content",
        "source_meta_title",
        "source_meta_description",
        "current_title",
        "current_subtitle",
        "current_excerpt",
        "current_content",
        "current_meta_title",
        "current_meta_description",
        "target_title",
        "target_subtitle",
        "target_excerpt",
        "target_content",
        "target_meta_title",
        "target_meta_description",
        "det_title",
        "det_excerpt",
        "det_content",
        "issue_note",
    ]


def clear_queue_dir(queue_dir: Path) -> None:
    if queue_dir.exists():
        for name in ["wrong-language", "mixed-placeholder", "missing-locales"]:
            shutil.rmtree(queue_dir / name, ignore_errors=True)
        for path in [queue_dir / "manifest.csv", queue_dir / "README.md"]:
            if path.exists():
                path.unlink()
    queue_dir.mkdir(parents=True, exist_ok=True)


def write_readme(queue_dir: Path) -> None:
    text = """# Translation Queue

Этот каталог генерируется автоматически из live `content_items`.

## Структура
- `wrong-language/` — строка есть, но внутри контент лежит не на том языке.
- `mixed-placeholder/` — заголовок уже локализован, а `content/excerpt/meta*` еще английские.
- `missing-locales/` — локали нет вообще, нужно создать новую строку.

## Как заполнять
- Менять только `target_*`.
- `source_*` — канонический английский source.
- `current_*` — текущий live snapshot проблемной строки.
- `target_row_id` пустой только у `missing-locales`.
- Для FAQ `target_title` автоматически зеркалится в `question` при импорте.
- Существующие `slug/status/visibility` не перетираются вслепую.
- Для новых locale строк slug выводится автоматически из английского source пути с locale prefix.

## Импорт
- Импортер берет только строки, где заполнено хотя бы одно `target_*`.
- Сначала закрывай `wrong-language`, потом `mixed-placeholder`, потом `missing-locales`.
- После каждого батча нужно делать re-audit.
"""
    (queue_dir / "README.md").write_text(text, encoding="utf-8")


def build_queue(snapshot: dict[str, Any], audit: dict[str, Any], queue_dir: Path) -> list[dict[str, Any]]:
    clear_queue_dir(queue_dir)
    write_readme(queue_dir)

    grouped = group_rows(snapshot["rows"])
    manifest_rows: list[dict[str, Any]] = []

    wrong_by_group: dict[tuple[str, str], list[dict[str, Any]]] = defaultdict(list)
    for row in audit["wrong_language"]:
        wrong_by_group[(row["type"], row["content_key"])].append(row)

    mixed_by_group: dict[tuple[str, str], list[dict[str, Any]]] = defaultdict(list)
    for row in audit["mixed_placeholder"]:
        mixed_by_group[(row["type"], row["content_key"])].append(row)

    missing_by_group: dict[tuple[str, str], dict[str, Any]] = {
        (row["type"], row["content_key"]): row for row in audit["missing_locales"]
    }

    def emit_group(
        issue_type: str,
        content_type: str,
        content_key: str,
        rows: list[dict[str, Any]],
        blocked_note: str = "",
    ) -> None:
        source_row = grouped.get((content_type, content_key), {}).get("en")
        rel_path = Path(issue_type.replace("_", "-")) / f"{content_type}__{content_key}.csv"
        if not source_row:
            manifest_rows.append(
                {
                    "issue_type": issue_type.replace("_", "-"),
                    "type": content_type,
                    "content_key": content_key,
                    "file_path": csv_cell(rel_path),
                    "row_count": len(rows),
                    "target_locales": ",".join(sorted(row["target_locale"] for row in rows)),
                    "source_available": "no",
                    "status": "blocked",
                    "issue_note": blocked_note or "missing english source row",
                }
            )
            return

        abs_path = queue_dir / rel_path
        write_csv(abs_path, queue_fieldnames(), rows)
        manifest_rows.append(
            {
                "issue_type": issue_type.replace("_", "-"),
                "type": content_type,
                "content_key": content_key,
                "file_path": csv_cell(rel_path),
                "row_count": len(rows),
                "target_locales": ",".join(sorted(row["target_locale"] for row in rows)),
                "source_available": "yes",
                "status": "ready",
                "issue_note": blocked_note,
            }
        )

    for (content_type, content_key), issue_rows in sorted(wrong_by_group.items()):
        source_row = grouped.get((content_type, content_key), {}).get("en")
        pack_rows = []
        for issue in sorted(issue_rows, key=lambda item: item["locale"]):
            target_row = grouped[(content_type, content_key)].get(issue["locale"])
            pack_rows.append(
                translation_row(
                    issue_type="wrong_language",
                    content_type=content_type,
                    content_key=content_key,
                    target_locale=issue["locale"],
                    source_row=source_row,
                    target_row=target_row,
                    issue_note=issue["issue_note"],
                    det_content=f"detected {issue['detected_language']}",
                )
            )
        emit_group("wrong_language", content_type, content_key, pack_rows)

    for (content_type, content_key), issue_rows in sorted(mixed_by_group.items()):
        source_row = grouped.get((content_type, content_key), {}).get("en")
        pack_rows = []
        for issue in sorted(issue_rows, key=lambda item: item["locale"]):
            target_row = grouped[(content_type, content_key)].get(issue["locale"])
            pack_rows.append(
                translation_row(
                    issue_type="mixed_placeholder",
                    content_type=content_type,
                    content_key=content_key,
                    target_locale=issue["locale"],
                    source_row=source_row,
                    target_row=target_row,
                    issue_note=issue["issue_note"],
                    det_title=issue["localized_shell_fields"],
                    det_excerpt=issue["placeholder_fields"],
                )
            )
        emit_group("mixed_placeholder", content_type, content_key, pack_rows)

    for (content_type, content_key), issue in sorted(missing_by_group.items()):
        source_row = grouped.get((content_type, content_key), {}).get("en")
        pack_rows = []
        missing_locales = [locale for locale in csv_cell(issue["missing_locales"]).split(",") if locale]
        for locale in missing_locales:
            pack_rows.append(
                translation_row(
                    issue_type="missing_locales",
                    content_type=content_type,
                    content_key=content_key,
                    target_locale=locale,
                    source_row=source_row,
                    target_row=None,
                    issue_note="missing locale row in live source",
                )
            )
        emit_group("missing_locales", content_type, content_key, pack_rows)

    write_csv(
        queue_dir / "manifest.csv",
        [
            "issue_type",
            "type",
            "content_key",
            "file_path",
            "row_count",
            "target_locales",
            "source_available",
            "status",
            "issue_note",
        ],
        manifest_rows,
    )
    return manifest_rows


def has_target_payload(row: dict[str, Any]) -> bool:
    return any(normalize_text(row.get(field)) for field in [
        "target_title",
        "target_subtitle",
        "target_excerpt",
        "target_content",
        "target_meta_title",
        "target_meta_description",
    ])


def localize_slug(source_slug: str, target_locale: str) -> str | None:
    source_slug = normalize_text(source_slug)
    if not source_slug:
        return None
    locale_pattern = "|".join(re.escape(locale) for locale in SUPPORTED_LOCALES)
    match = re.match(rf"^/({locale_pattern})(/.*)?$", source_slug)
    if match:
        base = match.group(2) or "/"
    else:
        base = source_slug if source_slug.startswith("/") else f"/{source_slug}"
    if target_locale == "en":
        return base
    if base == "/":
        return f"/{target_locale}"
    return f"/{target_locale}{base}"


def collect_import_rows(
    queue_dir: Path,
    import_dirs: list[Path] | None = None,
    issue_type_filter: set[str] | None = None,
    path_filters: list[str] | None = None,
) -> list[dict[str, Any]]:
    collected: list[dict[str, Any]] = []
    base_filters = set(path_filters or [])
    roots: list[Path] = []
    if import_dirs:
        roots.extend(import_dirs)
    else:
        for folder in ["wrong-language", "mixed-placeholder", "missing-locales"]:
            if issue_type_filter and folder not in issue_type_filter:
                continue
            roots.append(queue_dir / folder)

    for root in roots:
        if not root.exists() or not root.is_dir():
            continue
        for path in sorted(root.glob("*.csv")):
            if base_filters and path.name not in base_filters and str(path) not in base_filters:
                continue
            with path.open("r", encoding="utf-8", newline="") as handle:
                reader = csv.DictReader(handle)
                for row in reader:
                    row_issue_type = csv_cell(row.get("issue_type", "")).replace("_", "-")
                    if issue_type_filter and row_issue_type not in issue_type_filter:
                        continue
                    if has_target_payload(row):
                        row["__path"] = str(path)
                        collected.append(row)
    return collected


def build_import_payload(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
    payload: list[dict[str, Any]] = []
    for row in rows:
        payload.append(
            {
                "issue_type": row["issue_type"],
                "type": row["type"],
                "content_key": row["content_key"],
                "target_locale": row["target_locale"],
                "target_row_id": normalize_text(row.get("target_row_id")),
                "status_current": normalize_text(row.get("status_current")),
                "visibility_current": normalize_text(row.get("visibility_current")),
                "source_row_id": normalize_text(row.get("source_row_id")),
                "target_title": row.get("target_title", ""),
                "target_subtitle": row.get("target_subtitle", ""),
                "target_excerpt": row.get("target_excerpt", ""),
                "target_content": row.get("target_content", ""),
                "target_meta_title": row.get("target_meta_title", ""),
                "target_meta_description": row.get("target_meta_description", ""),
                "source_title": row.get("source_title", ""),
                "source_subtitle": row.get("source_subtitle", ""),
                "source_excerpt": row.get("source_excerpt", ""),
                "source_content": row.get("source_content", ""),
                "source_meta_title": row.get("source_meta_title", ""),
                "source_meta_description": row.get("source_meta_description", ""),
                "source_path": row.get("__path", ""),
            }
        )
    return payload


def run_remote_import(payload: list[dict[str, Any]], dry_run: bool = True) -> dict[str, Any]:
    payload_b64 = base64.b64encode(
        json.dumps(payload, ensure_ascii=False).encode("utf-8")
    ).decode("ascii")
    php_code = f"""<?php
require __DIR__ . '/vendor/autoload.php';
$app = require __DIR__ . '/bootstrap/app.php';
$kernel = $app->make(Illuminate\\Contracts\\Console\\Kernel::class);
$kernel->bootstrap();

use Illuminate\\Support\\Facades\\DB;
use Illuminate\\Support\\Carbon;

function localize_slug(?string $sourceSlug, string $targetLocale): ?string {{
    if (!$sourceSlug) {{
        return null;
    }}
    $localePattern = implode('|', {json.dumps(SUPPORTED_LOCALES)});
    if (preg_match('/^\\/(' . $localePattern . ')(\\/.*)?$/', $sourceSlug, $matches)) {{
        $base = $matches[2] ?? '/';
    }} else {{
        $base = str_starts_with($sourceSlug, '/') ? $sourceSlug : '/' . $sourceSlug;
    }}
    if ($targetLocale === 'en') {{
        return $base;
    }}
    if ($base === '/') {{
        return '/' . $targetLocale;
    }}
    return '/' . $targetLocale . $base;
}}

$payload = json_decode(base64_decode('{payload_b64}'), true, 512, JSON_THROW_ON_ERROR);
$dryRun = {str(dry_run).lower()};
$results = [];

foreach ($payload as $item) {{
    $type = $item['type'];
    $contentKey = $item['content_key'];
    $locale = $item['target_locale'];
    $source = DB::table('content_items')
        ->where('type', $type)
        ->where('content_key', $contentKey)
        ->where('locale', 'en')
        ->first();

    if (!$source) {{
        $results[] = [
            'type' => $type,
            'content_key' => $contentKey,
            'locale' => $locale,
            'status' => 'error',
            'message' => 'missing english source row',
        ];
        continue;
    }}

    $current = null;
    if (!empty($item['target_row_id'])) {{
        $current = DB::table('content_items')->where('id', (int) $item['target_row_id'])->first();
    }}
    if (!$current) {{
        $current = DB::table('content_items')
            ->where('type', $type)
            ->where('content_key', $contentKey)
            ->where('locale', $locale)
            ->first();
    }}

    $updates = [];
    foreach ([
        'title' => 'target_title',
        'subtitle' => 'target_subtitle',
        'excerpt' => 'target_excerpt',
        'content' => 'target_content',
        'meta_title' => 'target_meta_title',
        'meta_description' => 'target_meta_description',
    ] as $column => $payloadKey) {{
        $value = trim((string) ($item[$payloadKey] ?? ''));
        if ($value !== '') {{
            $updates[$column] = $value;
        }}
    }}

    if ($type === 'faq' && !empty($updates['title'])) {{
        $updates['question'] = $updates['title'];
    }}

    if ($current) {{
        if (!$updates) {{
            $results[] = [
                'id' => $current->id,
                'type' => $type,
                'content_key' => $contentKey,
                'locale' => $locale,
                'status' => 'skipped',
                'message' => 'no target fields filled',
            ];
            continue;
        }}
        $updates['updated_at'] = Carbon::now();
        if (!$dryRun) {{
            DB::table('content_items')->where('id', $current->id)->update($updates);
        }}
        $results[] = [
            'id' => $current->id,
            'type' => $type,
            'content_key' => $contentKey,
            'locale' => $locale,
            'status' => $dryRun ? 'dry-run-update' : 'updated',
            'updated_fields' => array_keys($updates),
        ];
        continue;
    }}

    $title = trim((string) ($item['target_title'] ?? ''));
    if ($title === '') {{
        $results[] = [
            'type' => $type,
            'content_key' => $contentKey,
            'locale' => $locale,
            'status' => 'error',
            'message' => 'missing target_title for create',
        ];
        continue;
    }}

    $base = (array) $source;
    unset($base['id']);
    $base['locale'] = $locale;
    $base['language_summary'] = strtoupper($locale);
    $base['slug'] = localize_slug($source->slug ?? null, $locale);
    $base['status'] = $source->status ?: ($item['status_current'] ?: 'Draft');
    $base['visibility'] = $source->visibility ?: ($item['visibility_current'] ?: 'Public');
    $base['title'] = $title;
    if ($type === 'faq') {{
        $base['question'] = $title;
    }}
    foreach ([
        'subtitle' => 'target_subtitle',
        'excerpt' => 'target_excerpt',
        'content' => 'target_content',
        'meta_title' => 'target_meta_title',
        'meta_description' => 'target_meta_description',
    ] as $column => $payloadKey) {{
        $value = trim((string) ($item[$payloadKey] ?? ''));
        $base[$column] = $value !== '' ? $value : null;
    }}
    $base['created_at'] = Carbon::now();
    $base['updated_at'] = Carbon::now();

    if (!$dryRun) {{
        $insertId = DB::table('content_items')->insertGetId($base);
    }} else {{
        $insertId = null;
    }}
    $results[] = [
        'id' => $insertId,
        'type' => $type,
        'content_key' => $contentKey,
        'locale' => $locale,
        'slug' => $base['slug'],
        'status' => $dryRun ? 'dry-run-create' : 'created',
    ];
}}

echo json_encode(
    [
        'dry_run' => $dryRun,
        'count' => count($payload),
        'results' => $results,
    ],
    JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
);
"""
    return json.loads(run_remote_php(php_code))


def command_audit(args: argparse.Namespace) -> int:
    rows = fetch_live_rows()
    audit = audit_to_disk(rows, args.output_dir, args.snapshot_path)
    print(
        json.dumps(
            {
                "generated_at": now_iso(),
                "row_count": len(rows),
                "wrong_language_rows": len(audit["wrong_language"]),
                "mixed_placeholder_rows": len(audit["mixed_placeholder"]),
                "missing_locale_groups": len(audit["missing_locales"]),
                "snapshot": str(args.snapshot_path),
            },
            ensure_ascii=False,
            indent=2,
        )
    )
    return 0


def load_or_fetch_snapshot(snapshot_path: Path) -> dict[str, Any]:
    if snapshot_path.exists():
        return load_snapshot(snapshot_path)
    rows = fetch_live_rows()
    return save_snapshot(rows, snapshot_path)


def command_pack(args: argparse.Namespace) -> int:
    snapshot = load_or_fetch_snapshot(args.snapshot_path)
    audit = run_audit(snapshot["rows"])
    manifest = build_queue(snapshot, audit, args.queue_dir)
    print(
        json.dumps(
            {
                "generated_at": now_iso(),
                "queue_dir": str(args.queue_dir),
                "manifest_rows": len(manifest),
                "wrong_language_groups": sum(
                    1 for row in manifest if row["issue_type"] == "wrong-language"
                ),
                "mixed_placeholder_groups": sum(
                    1 for row in manifest if row["issue_type"] == "mixed-placeholder"
                ),
                "missing_locales_groups": sum(
                    1 for row in manifest if row["issue_type"] == "missing-locales"
                ),
            },
            ensure_ascii=False,
            indent=2,
        )
    )
    return 0


def command_all(args: argparse.Namespace) -> int:
    rows = fetch_live_rows()
    snapshot = save_snapshot(rows, args.snapshot_path)
    audit = audit_to_disk(rows, args.output_dir, args.snapshot_path)
    manifest = build_queue(snapshot, audit, args.queue_dir)
    print(
        json.dumps(
            {
                "generated_at": now_iso(),
                "row_count": len(rows),
                "wrong_language_rows": len(audit["wrong_language"]),
                "mixed_placeholder_rows": len(audit["mixed_placeholder"]),
                "missing_locale_groups": len(audit["missing_locales"]),
                "manifest_rows": len(manifest),
                "queue_dir": str(args.queue_dir),
            },
            ensure_ascii=False,
            indent=2,
        )
    )
    return 0


def command_import(args: argparse.Namespace) -> int:
    issue_type_filter = set(args.issue_type or [])
    import_dirs = [Path(path) for path in (args.import_dir or [])]
    rows = collect_import_rows(args.queue_dir, import_dirs, issue_type_filter, args.files)
    payload = build_import_payload(rows)
    if not payload:
        print(
            json.dumps(
                {
                    "generated_at": now_iso(),
                    "status": "noop",
                    "message": "no completed translation rows found",
                },
                ensure_ascii=False,
                indent=2,
            )
        )
        return 0

    result = run_remote_import(payload, dry_run=not args.apply)
    output = {
        "generated_at": now_iso(),
        "apply": args.apply,
        "queued_rows": len(payload),
        "remote": result,
    }

    if args.apply:
        subprocess.run([str(SYNC_SCRIPT)], cwd=str(PROJECT_ROOT), check=True)
        rows_after = fetch_live_rows()
        audit_after = audit_to_disk(rows_after, args.output_dir, args.snapshot_path)
        build_queue(save_snapshot(rows_after, args.snapshot_path), audit_after, args.queue_dir)
        output["post_import_audit"] = {
            "wrong_language_rows": len(audit_after["wrong_language"]),
            "mixed_placeholder_rows": len(audit_after["mixed_placeholder"]),
            "missing_locale_groups": len(audit_after["missing_locales"]),
        }

    print(json.dumps(output, ensure_ascii=False, indent=2))
    return 0


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Audit live content_items, generate translation CSV packs, and import filled translations."
    )
    parser.set_defaults(output_dir=DEFAULT_OUTPUT_DIR, queue_dir=DEFAULT_QUEUE_DIR, snapshot_path=DEFAULT_SNAPSHOT_PATH)

    subparsers = parser.add_subparsers(dest="command", required=True)

    audit_parser = subparsers.add_parser("audit", help="Fetch live content_items and regenerate audit reports.")
    audit_parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
    audit_parser.add_argument("--snapshot-path", type=Path, default=DEFAULT_SNAPSHOT_PATH)
    audit_parser.set_defaults(func=command_audit)

    pack_parser = subparsers.add_parser("pack", help="Generate translation_queue from the latest snapshot.")
    pack_parser.add_argument("--queue-dir", type=Path, default=DEFAULT_QUEUE_DIR)
    pack_parser.add_argument("--snapshot-path", type=Path, default=DEFAULT_SNAPSHOT_PATH)
    pack_parser.set_defaults(func=command_pack)

    all_parser = subparsers.add_parser("all", help="Run audit and queue generation in one pass.")
    all_parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
    all_parser.add_argument("--queue-dir", type=Path, default=DEFAULT_QUEUE_DIR)
    all_parser.add_argument("--snapshot-path", type=Path, default=DEFAULT_SNAPSHOT_PATH)
    all_parser.set_defaults(func=command_all)

    import_parser = subparsers.add_parser("import", help="Import completed translation_queue CSV rows back into live.")
    import_parser.add_argument("--queue-dir", type=Path, default=DEFAULT_QUEUE_DIR)
    import_parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
    import_parser.add_argument("--snapshot-path", type=Path, default=DEFAULT_SNAPSHOT_PATH)
    import_parser.add_argument(
        "--issue-type",
        action="append",
        choices=["wrong-language", "mixed-placeholder", "missing-locales"],
        help="Limit import to one or more issue folders.",
    )
    import_parser.add_argument(
        "--files",
        nargs="*",
        help="Optional file names or full paths inside translation_queue to import.",
    )
    import_parser.add_argument(
        "--import-dir",
        action="append",
        help="Optional external directory with completed translation CSV files.",
    )
    import_parser.add_argument(
        "--apply",
        action="store_true",
        help="Actually write to live DB. Without this flag the importer runs in dry-run mode.",
    )
    import_parser.set_defaults(func=command_import)

    return parser


def main() -> int:
    parser = build_parser()
    args = parser.parse_args()
    try:
        return args.func(args)
    except Exception as exc:  # noqa: BLE001
        print(json.dumps({"status": "error", "message": str(exc)}, ensure_ascii=False, indent=2), file=sys.stderr)
        return 1


if __name__ == "__main__":
    raise SystemExit(main())
