"""Local Hugging Face completion playground (Gradio)."""

from __future__ import annotations

import html
import os
import threading
from typing import Any

import gradio as gr
import torch
from gradio.themes.utils import fonts
from gradio.themes.utils.colors import Color
from transformers import AutoModelForCausalLM, AutoTokenizer

from completion_html import build_completion_html

_UZH_BLUE = Color(
    name="uzh_blue",
    c50="#BDC9E8",
    c100="#BDC9E8",
    c200="#9DADEE",
    c300="#7596FF",
    c400="#3062FF",
    c500="#0028A5",
    c600="#001E7C",
    c700="#001452",
    c800="#001452",
    c900="#000A28",
    c950="#000000",
)
_UZH_CYAN = Color(
    name="uzh_cyan",
    c50="#DBF4F9",
    c100="#DBF4F9",
    c200="#B7E9F4",
    c300="#92DFEE",
    c400="#4AC9E3",
    c500="#1EA7C4",
    c600="#147082",
    c700="#147082",
    c800="#0E5A66",
    c900="#0A3D44",
    c950="#05282C",
)
_UZH_GREY = Color(
    name="uzh_grey",
    c50="#FAFAFA",
    c100="#EFEFEF",
    c200="#E7E7E7",
    c300="#E0E0E0",
    c400="#C2C2C2",
    c500="#A3A3A3",
    c600="#666666",
    c700="#4D4D4D",
    c800="#333333",
    c900="#1A1A1A",
    c950="#000000",
)

UZH_THEME = gr.themes.Default(
    primary_hue=_UZH_BLUE,
    secondary_hue=_UZH_CYAN,
    neutral_hue=_UZH_GREY,
    font=(
        fonts.GoogleFont("Source Sans 3"),
        "ui-sans-serif",
        "system-ui",
        "sans-serif",
    ),
    font_mono=("ui-monospace", "Menlo", "Consolas", "monospace"),
)
# Stronger text contrast than default neutral greys (avoid overly light labels / hints)
UZH_THEME.set(
    body_text_color="#1A1A1A",
    body_text_color_subdued="#4D4D4D",
    block_label_text_color="#333333",
    block_info_text_color="#4D4D4D",
    block_title_text_color="#1A1A1A",
    input_placeholder_color="#666666",
    body_text_color_dark="#F0F0F0",
    body_text_color_subdued_dark="#D0D0D0",
    block_label_text_color_dark="#EFEFEF",
    block_info_text_color_dark="#C2C2C2",
    block_title_text_color_dark="#FAFAFA",
    input_placeholder_color_dark="#A3A3A3",
)

UZH_APP_CSS = """
/* Links: UZH Blue / Blue 3 */
.gradio-container a {
  color: #0028A5;
}
.gradio-container a:hover {
  color: #3062FF;
}
"""

DEFAULT_MODEL_ID = "HuggingFaceTB/SmolLM-135M"

_model_lock = threading.Lock()
_loaded_model_id: str | None = None
_model: Any = None
_tokenizer: Any = None


def select_device() -> torch.device:
    if torch.cuda.is_available():
        return torch.device("cuda")
    if getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available():
        return torch.device("mps")
    return torch.device("cpu")


def load_model(model_id: str) -> None:
    global _loaded_model_id, _model, _tokenizer
    with _model_lock:
        if _loaded_model_id == model_id and _model is not None and _tokenizer is not None:
            return
        device = select_device()
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
            tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelForCausalLM.from_pretrained(model_id)
        model = model.to(device)
        model.eval()
        _tokenizer = tokenizer
        _model = model
        _loaded_model_id = model_id


def _token_piece_text(tokenizer: Any, token_id: int) -> str:
    return tokenizer.decode([token_id], skip_special_tokens=False)


def generate_completion_with_metadata(
    prompt_text: str,
    model_id: str,
    temperature: float,
    max_new_tokens: int,
    top_p: float,
) -> tuple[str, list[str], list[float], list[list[dict[str, Any]]], list[bool]]:
    load_model(model_id)
    assert _model is not None and _tokenizer is not None
    device = next(_model.parameters()).device
    tokenizer = _tokenizer
    model = _model

    encoded = tokenizer(prompt_text, return_tensors="pt", add_special_tokens=True)
    input_ids = encoded["input_ids"].to(device)
    attention_mask = encoded.get("attention_mask")
    if attention_mask is not None:
        attention_mask = attention_mask.to(device)

    temperature_value = float(temperature)
    use_sampling = temperature_value > 0.0
    generate_kwargs: dict[str, Any] = {
        "max_new_tokens": int(max_new_tokens),
        "do_sample": use_sampling,
        "return_dict_in_generate": True,
        "output_scores": True,
        "pad_token_id": tokenizer.pad_token_id,
    }
    if use_sampling:
        generate_kwargs["temperature"] = temperature_value
        generate_kwargs["top_p"] = float(top_p)
    if attention_mask is not None:
        generate_kwargs["attention_mask"] = attention_mask

    with torch.inference_mode():
        outputs = model.generate(input_ids, **generate_kwargs)

    sequences = outputs.sequences
    scores = outputs.scores
    if scores is None:
        raise RuntimeError("Generation did not return scores; check model.generate arguments.")

    prompt_length = input_ids.shape[1]
    generated_ids = sequences[0, prompt_length:]
    generated_list = generated_ids.tolist()

    if len(generated_list) != len(scores):
        raise RuntimeError(
            f"Score count ({len(scores)}) does not match generated tokens ({len(generated_list)})."
        )

    token_strings: list[str] = []
    chosen_probabilities: list[float] = []
    top5_alternatives: list[list[dict[str, Any]]] = []
    chosen_in_top5_flags: list[bool] = []

    for step_index, token_id in enumerate(generated_list):
        logits = scores[step_index][0]
        probabilities = torch.softmax(logits.float(), dim=-1)
        chosen_probability = float(probabilities[token_id].item())
        top_k = min(5, probabilities.shape[-1])
        top_values, top_indices = torch.topk(probabilities, top_k)

        top_token_ids = [int(top_indices[rank].item()) for rank in range(top_values.shape[0])]
        chosen_in_top5 = token_id in top_token_ids

        alternatives: list[dict[str, Any]] = []
        for rank in range(top_values.shape[0]):
            alternative_id = int(top_indices[rank].item())
            alternative_probability = float(top_values[rank].item())
            alternatives.append(
                {
                    "token_text": _token_piece_text(tokenizer, alternative_id),
                    "probability": alternative_probability,
                }
            )

        token_strings.append(_token_piece_text(tokenizer, token_id))
        chosen_probabilities.append(chosen_probability)
        top5_alternatives.append(alternatives)
        chosen_in_top5_flags.append(chosen_in_top5)

    completion_text = tokenizer.decode(generated_list, skip_special_tokens=True)
    return completion_text, token_strings, chosen_probabilities, top5_alternatives, chosen_in_top5_flags


def run_generate(
    user_prompt: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    original_text: str,
    completion_text: str,
    has_completion: bool,
) -> tuple[Any, ...]:
    del original_text, completion_text, has_completion
    try:
        (
            _completion_full,
            token_strings,
            chosen_probabilities,
            top5_alternatives,
            chosen_in_top5_flags,
        ) = generate_completion_with_metadata(
            user_prompt,
            DEFAULT_MODEL_ID,
            temperature,
            max_new_tokens,
            top_p,
        )
    except Exception as error:
        gr.Warning(f"Generation failed: {error}")
        safe_message = html.escape(str(error))
        return (
            gr.update(
                value=f'<div class="completion-playground-root">Error: {safe_message}</div>',
                visible=True,
            ),
            gr.update(value=user_prompt, visible=True, interactive=True),
            gr.update(interactive=True),
            gr.update(visible=False),
            user_prompt,
            "",
            False,
        )

    highlighted_html = build_completion_html(
        user_prompt,
        token_strings,
        chosen_probabilities,
        top5_alternatives,
        chosen_in_top5_flags,
    )
    return (
        gr.update(value=highlighted_html, visible=True),
        gr.update(value=user_prompt, visible=False, interactive=False),
        gr.update(interactive=False),
        gr.update(visible=True),
        user_prompt,
        _completion_full,
        True,
    )


def run_undo(
    original_text: str,
    completion_text: str,
    has_completion: bool,
) -> tuple[Any, ...]:
    del completion_text, has_completion
    return (
        gr.update(value=None, visible=False),
        gr.update(value=original_text, visible=True, interactive=True),
        gr.update(interactive=True),
        gr.update(visible=False),
        original_text,
        "",
        False,
    )


PLAYGROUND_HEAD = """
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Source+Sans+3:ital,wght@0,400;0,600;0,700;1,400&display=swap" rel="stylesheet">
<style>
/* Token highlight is applied via inline styles in JS (Gradio HTML may use Shadow DOM). */
.token-top5-popover {
  position: fixed;
  z-index: 10000;
  max-width: 28rem;
  padding: 0.65rem 0.85rem;
  font-family: "Source Sans 3", ui-sans-serif, system-ui, sans-serif;
  background: #FFFFFF;
  color: #1A1A1A;
  border: 1px solid #C2C2C2;
  border-radius: 8px;
  box-shadow: 0 8px 24px rgba(0, 0, 0, 0.12);
  font-size: 0.85rem;
  line-height: 1.45;
  display: none;
  pointer-events: auto;
}
.token-top5-popover h4,
.token-top5-popover .token-top5-sampled-heading {
  font-size: 0.68rem;
  font-weight: 600;
  letter-spacing: 0.04em;
  text-transform: uppercase;
  color: #4D4D4D;
}
.token-top5-popover h4 {
  margin: 0 0 0.45rem 0;
}
.token-top5-popover .token-top5-sampled-heading {
  margin: 0 0 0.35rem 0;
}
.token-top5-popover .token-top5-list {
  margin: 0;
  padding: 0;
}
.token-top5-popover .token-top5-row {
  display: grid;
  grid-template-columns: minmax(0, 1fr) auto;
  gap: 0.35rem 0.5rem;
  align-items: center;
  margin: 0.28rem 0;
}
.token-top5-popover .token-top5-cell {
  min-width: 0;
  display: flex;
  flex-direction: column;
  gap: 0.2rem;
}
.token-top5-popover .token-top5-text {
  font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace;
  color: #1A1A1A;
  word-break: break-word;
  line-height: 1.25;
}
.token-top5-popover .token-top5-bar-track {
  height: 5px;
  border-radius: 3px;
  background: #E7E7E7;
  overflow: hidden;
}
.token-top5-popover .token-top5-bar {
  height: 100%;
  border-radius: 3px;
  background: linear-gradient(90deg, #3062FF, #0028A5);
  min-width: 2px;
  max-width: 100%;
}
.token-top5-popover .token-top5-met {
  font-variant-numeric: tabular-nums;
  color: #333333;
  white-space: nowrap;
  font-size: 0.8rem;
  line-height: 1.2;
}
.token-top5-popover hr.token-top5-hairline {
  border: 0;
  border-top: 1px solid #C2C2C2;
  margin: 0.65rem 0 0.45rem 0;
  padding: 0;
}
</style>
<script>
(function () {
  var popover = null;
  var activeToken = null;
  function clearTokenHighlight() {
    if (!activeToken) {
      return;
    }
    activeToken.style.removeProperty("box-shadow");
    activeToken.style.removeProperty("border-radius");
    activeToken.style.removeProperty("position");
    activeToken.style.removeProperty("z-index");
    activeToken = null;
  }
  function applyTokenHighlight(token) {
    token.style.boxShadow = "0 0 0 2px #FFC845";
    token.style.borderRadius = "2px";
    token.style.position = "relative";
    token.style.zIndex = "1";
  }
  function ensurePopover() {
    if (!popover) {
      popover = document.createElement("div");
      popover.id = "token-top5-popover";
      popover.className = "token-top5-popover";
      popover.addEventListener("click", function (e) { e.stopPropagation(); });
      document.body.appendChild(popover);
    }
    return popover;
  }
  function hidePopover() {
    if (popover) { popover.style.display = "none"; }
    clearTokenHighlight();
  }
  document.addEventListener("click", function (e) {
    var target = e.target;
    if (!target || !target.closest) { return; }
    var token = target.closest(".completion-token");
    if (!token) {
      if (popover && !popover.contains(target)) { hidePopover(); }
      return;
    }
    e.stopPropagation();
    var encoded = token.getAttribute("data-top5");
    if (!encoded) { return; }
    var jsonText;
    try {
      jsonText = atob(encoded);
    } catch (err) {
      return;
    }
    var payload;
    try {
      payload = JSON.parse(jsonText);
    } catch (err2) {
      return;
    }
    var rows = payload.alternatives || [];
    var maxProb = 0;
    for (var i = 0; i < rows.length; i++) {
      var p = Number(rows[i].probability);
      if (p > maxProb) { maxProb = p; }
    }
    var sampled = payload.sampled_token;
    var chosenInTop5 = payload.chosen_in_top5 !== false;
    var sampledProb =
      sampled && sampled.probability !== undefined && sampled.probability !== null
        ? Number(sampled.probability)
        : 0;
    if (!chosenInTop5) {
      maxProb = Math.max(maxProb, sampledProb);
    }
    if (maxProb <= 0) { maxProb = 1; }
    var listItems = rows.map(function (row) {
      var pct = (row.probability * 100).toFixed(2);
      var text = String(row.token_text).replace(/</g, "&lt;").replace(/>/g, "&gt;");
      var rel = Math.max(0, Math.min(100, (row.probability / maxProb) * 100));
      return (
        '<div class="token-top5-row">' +
        '<div class="token-top5-cell">' +
        '<span class="token-top5-text">' + text + "</span>" +
        '<div class="token-top5-bar-track" aria-hidden="true">' +
        '<div class="token-top5-bar" style="width:' + rel + '%"></div>' +
        "</div></div>" +
        '<span class="token-top5-met">' + pct + "%</span>" +
        "</div>"
      );
    });
    var extraSection = "";
    if (!chosenInTop5 && sampled) {
      var stext = String(sampled.token_text || "").replace(/</g, "&lt;").replace(/>/g, "&gt;");
      var spct = (sampledProb * 100).toFixed(2);
      var srel = Math.max(0, Math.min(100, (sampledProb / maxProb) * 100));
      extraSection =
        '<hr class="token-top5-hairline" />' +
        '<p class="token-top5-sampled-heading">Sampled token</p>' +
        '<div class="token-top5-row token-top5-row-sampled">' +
        '<div class="token-top5-cell">' +
        '<span class="token-top5-text">' +
        stext +
        "</span>" +
        '<div class="token-top5-bar-track" aria-hidden="true">' +
        '<div class="token-top5-bar" style="width:' +
        srel +
        '%"></div>' +
        "</div></div>" +
        '<span class="token-top5-met">' +
        spct +
        "%</span>" +
        "</div>";
    }
    clearTokenHighlight();
    activeToken = token;
    applyTokenHighlight(token);
    var panel = ensurePopover();
    panel.innerHTML =
      "<h4>Most probable tokens</h4>" +
      '<div class="token-top5-list">' +
      listItems.join("") +
      "</div>" +
      extraSection;
    panel.style.display = "block";
    var margin = 8;
    var x = e.clientX + margin;
    var y = e.clientY + margin;
    panel.style.left = "0px";
    panel.style.top = "0px";
    var rect = panel.getBoundingClientRect();
    if (x + rect.width > window.innerWidth - margin) {
      x = Math.max(margin, window.innerWidth - rect.width - margin);
    }
    if (y + rect.height > window.innerHeight - margin) {
      y = Math.max(margin, window.innerHeight - rect.height - margin);
    }
    panel.style.left = x + "px";
    panel.style.top = y + "px";
  });
})();
</script>
"""

with gr.Blocks(title="LLM Text Completion Playground") as demo:
    gr.Markdown(
        "# LLM Text Completion Playground\n\n"
        "LLM used: [SmolLM-135M](https://huggingface.co/HuggingFaceTB/SmolLM-135M)."
    )

    with gr.Row():
        with gr.Column(scale=2):
            user_input = gr.Textbox(
                label="Text to be completed",
                placeholder="Enter your text here...",
                lines=10,
                max_lines=20,
            )
            highlighted_view = gr.HTML(visible=False, container=True)

        with gr.Column(scale=1):
            gr.Markdown("### Configuration")

            max_length = gr.Slider(
                minimum=1,
                maximum=48,
                value=10,
                step=1,
                label="Maximum Length",
                info="The maximum number of tokens to generate.",
            )

            temperature = gr.Slider(
                minimum=0,
                maximum=2,
                value=1,
                step=0.01,
                label="Temperature",
                info="Controls randomness: Lowering results in less random completions.",
            )

            top_p = gr.Slider(
                minimum=0,
                maximum=1,
                value=1,
                step=0.01,
                label="Top P",
                info="Controls diversity via nucleus sampling.",
            )

            submit_button = gr.Button("Generate Completion", variant="primary")
            undo_button = gr.Button("Undo", variant="secondary", visible=False)

    original_text_state = gr.State("")
    completion_text_state = gr.State("")
    has_completion_state = gr.State(False)

    submit_button.click(
        fn=run_generate,
        inputs=[
            user_input,
            max_length,
            temperature,
            top_p,
            original_text_state,
            completion_text_state,
            has_completion_state,
        ],
        outputs=[
            highlighted_view,
            user_input,
            submit_button,
            undo_button,
            original_text_state,
            completion_text_state,
            has_completion_state,
        ],
    )

    undo_button.click(
        fn=run_undo,
        inputs=[
            original_text_state,
            completion_text_state,
            has_completion_state,
        ],
        outputs=[
            highlighted_view,
            user_input,
            submit_button,
            undo_button,
            original_text_state,
            completion_text_state,
            has_completion_state,
        ],
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")),
        ssr_mode=False,
        head=PLAYGROUND_HEAD,
        theme=UZH_THEME,
        css=UZH_APP_CSS,
    )