import yaml
from copy import deepcopy

prompt_template = """
You are a medical assistant. From the Hungarian medical report below, determine the presence of the following patient conditions / status indicators. A symptom should be marked only if it is explicitly stated in the text, including unambiguous synonymous medical wording. 

**Categories**

{category_lines_with_short_descriptions}

{answers_section}

Critical Hungarian scope rule:
**If a clause lists items ("comma", "és", "vagy", "szóköz", " " separated, even when the separator is missing) and later contains a cue like
"negatív/pozitív/nincs(nincsen)/nem látható/jelen van/kimutatható/nem mutat/nem igazolható/expresszál",
that cue applies to ALL items in the list within the same clause, unless a strong boundary breaks it (.; or new sentence). Clarification (additive, non-destructive):

Abstract example:
"A B C nincs" → A, B, C are all negative.

When a clause (i.e., a phrase not broken by ., ;, or a new sentence) lists multiple findings — even with missing commas or spaces — and ends with a negation like "nincs", "nem látható", "negatív", etc., the negation applies to every item in that list.**

Report:
```
{content}
```

Return the answer in the following format:

Reasoning:
<very short reasoning tied strictly to explicit text evidence>
JSON:
{lines_with_names_formats}

I repeat: Critical Hungarian scope rule:
**If a clause lists items ("comma", "és", "vagy", "szóköz", " " separated, even when the separator is missing) and later contains a cue like
"negatív/pozitív/nincs(nincsen)/nem látható/jelen van/kimutatható/nem mutat/nem igazolható/expresszál",
that cue applies to ALL items in the list within the same clause, unless a strong boundary breaks it (.; or new sentence). Clarification (additive, non-destructive):

Abstract example:
"A B C nincs" → A, B, C are all negative.

When a clause (i.e., a phrase not broken by ., ;, or a new sentence) lists multiple findings — even with missing commas or spaces — and ends with a negation like "nincs", "nem látható", "negatív", etc., the negation applies to every item in that list.**
"""


def load_schema(path="symptoms.yaml"):
    with open(path, "r", encoding="utf-8") as f:
        schema = yaml.safe_load(f)
    return schema


def resolve_field(field, defaults):
    if "extends" not in field:
        return field
    base = deepcopy(defaults[field["extends"]])
    merged = {**base, **field}
    del merged["extends"]
    return merged


def build_sections(schema, group_id=None):
    """Build prompt sections.

    If `group_id` is not None, only include fields whose `group_id` equals the provided value.
    """
    defaults = schema.get("defaults", {})
    fields = schema.get("fields", [])

    resolved_all = [resolve_field(f, defaults) for f in fields]

    # Optional filtering by group_id
    if group_id is None:
        resolved = resolved_all
    else:
        # allow group_id to be passed as int or str; schema values may be int
        try:
            gid = int(group_id)
        except (TypeError, ValueError):
            gid = group_id
        resolved = [f for f in resolved_all if f.get("group_id") == gid]

    has_tristate = any(
        f.get("type") == "enum" and set(f.get("values", [])) == {"Yes", "No", "Not mentioned"}
        for f in resolved
    )

    # Categories section
    category_lines = []
    for f in resolved:
        label = f["label"]
        desc = f.get("description", "")
        aliases = f.get("aliases_hu", []) or []

        alias_part = ""
        if aliases:
            alias_part = " HU aliases: " + ", ".join(aliases)

        if desc:
            line = f"- {label} ({desc}{alias_part})"
        else:
            # Still show aliases even if description is missing
            line = f"- {label} ({alias_part.strip()})" if aliases else f"- {label}"

        category_lines.append(line)

    categories_text = "\n".join(category_lines)

    # JSON skeleton
    json_lines = []
    for f in resolved:
        key = f["key"]
        hint = f.get("format_hint", "<value>")
        json_lines.append(f'  "{key}": "{hint}"')

    json_text = "{\n" + ",\n".join(json_lines) + "\n}"

    return categories_text, json_text, has_tristate


def build_prompt(report_text, schema_path="symptoms.yaml", group_id=None):
    schema = load_schema(schema_path)
    categories_text, json_text, has_tristate = build_sections(schema, group_id=group_id)

    if has_tristate:
        answers_section = """**Answers** (only for fields whose expected format is `Yes/No/Not mentioned`)

- `Yes`  (requires a clear positive mention; if it is only loosely indicated that's not enough evidence)
- `No`   (requires a clear negative mention; if it is only loosely indicated that's not enough evidence)
- `Not mentioned` (if the symptom is not explicitly referenced, or just very weakly implied)

For non-tri-state fields (e.g., numeric or free-text fields), follow the field’s required output format shown in the JSON template (e.g., return a number or `null`, or return short bullet points / `No info`).
"""
    else:
        answers_section = ""

    return prompt_template.format(
        category_lines_with_short_descriptions=categories_text,
        lines_with_names_formats=json_text,
        content=report_text,
        answers_section=answers_section,
    )


if __name__ == "__main__":
    sample = "CD10 pozitív, CD56 nincs kimutatható."

    # Example: build the prompt only for a specific group_id (e.g. 0)
    print(build_prompt(sample, group_id=5))