from html.parser import HTMLParser

import cross


class HTMLPostTokenizer(HTMLParser):
    def __init__(self) -> None:
        super().__init__()
        self.tokens: list[cross.Token] = []

        self.mentions: list[tuple[str, str]]
        self.tags: list[str]

        self.in_pre = False
        self.in_code = False

        self.current_tag_stack = []
        self.list_stack = []

        self.anchor_stack = []
        self.anchor_data = []

    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
        attrs_dict = dict(attrs)

        def append_newline():
            if self.tokens:
                last_token = self.tokens[-1]
                if isinstance(
                    last_token, cross.TextToken
                ) and not last_token.text.endswith("\n"):
                    self.tokens.append(cross.TextToken("\n"))

        match tag:
            case "br":
                self.tokens.append(cross.TextToken("  \n"))
            case "a":
                href = attrs_dict.get("href", "")
                self.anchor_stack.append(href)
            case "strong", "b":
                self.tokens.append(cross.TextToken("**"))
            case "em", "i":
                self.tokens.append(cross.TextToken("*"))
            case "del", "s":
                self.tokens.append(cross.TextToken("~~"))
            case "code":
                if not self.in_pre:
                    self.tokens.append(cross.TextToken("`"))
                    self.in_code = True
            case "pre":
                append_newline()
                self.tokens.append(cross.TextToken("```\n"))
                self.in_pre = True
            case "blockquote":
                append_newline()
                self.tokens.append(cross.TextToken("> "))
            case "ul", "ol":
                self.list_stack.append(tag)
                append_newline()
            case "li":
                indent = "  " * (len(self.list_stack) - 1)
                if self.list_stack and self.list_stack[-1] == "ul":
                    self.tokens.append(cross.TextToken(f"{indent}- "))
                elif self.list_stack and self.list_stack[-1] == "ol":
                    self.tokens.append(cross.TextToken(f"{indent}1. "))
            case _:
                if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
                    level = int(tag[1])
                    self.tokens.append(cross.TextToken("\n" + "#" * level + " "))

        self.current_tag_stack.append(tag)

    def handle_data(self, data: str) -> None:
        if self.anchor_stack:
            self.anchor_data.append(data)
        else:
            self.tokens.append(cross.TextToken(data))

    def handle_endtag(self, tag: str) -> None:
        if not self.current_tag_stack:
            return

        if tag in self.current_tag_stack:
            self.current_tag_stack.remove(tag)

        match tag:
            case "p":
                self.tokens.append(cross.TextToken("\n\n"))
            case "a":
                href = self.anchor_stack.pop()
                anchor_data = "".join(self.anchor_data)
                self.anchor_data = []

                if anchor_data.startswith("#"):
                    as_tag = anchor_data[1:].lower()
                    if any(as_tag == block for block in self.tags):
                        self.tokens.append(cross.TagToken(anchor_data[1:]))
                elif anchor_data.startswith("@"):
                    match = next(
                        (pair for pair in self.mentions if anchor_data in pair), None
                    )

                    if match:
                        self.tokens.append(cross.MentionToken(match[1], ""))
                else:
                    self.tokens.append(cross.LinkToken(href, anchor_data))
            case "strong", "b":
                self.tokens.append(cross.TextToken("**"))
            case "em", "i":
                self.tokens.append(cross.TextToken("*"))
            case "del", "s":
                self.tokens.append(cross.TextToken("~~"))
            case "code":
                if not self.in_pre and self.in_code:
                    self.tokens.append(cross.TextToken("`"))
                    self.in_code = False
            case "pre":
                self.tokens.append(cross.TextToken("\n```\n"))
                self.in_pre = False
            case "blockquote":
                self.tokens.append(cross.TextToken("\n"))
            case "ul", "ol":
                if self.list_stack:
                    self.list_stack.pop()
                self.tokens.append(cross.TextToken("\n"))
            case "li":
                self.tokens.append(cross.TextToken("\n"))
            case _:
                if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
                    self.tokens.append(cross.TextToken("\n"))

    def get_tokens(self) -> list[cross.Token]:
        if not self.tokens:
            return []

        combined: list[cross.Token] = []
        buffer: list[str] = []

        def flush_buffer():
            if buffer:
                merged = "".join(buffer)
                combined.append(cross.TextToken(text=merged))
                buffer.clear()

        for token in self.tokens:
            if isinstance(token, cross.TextToken):
                buffer.append(token.text)
            else:
                flush_buffer()
                combined.append(token)

        flush_buffer()

        if combined and isinstance(combined[-1], cross.TextToken):
            if combined[-1].text.endswith("\n\n"):
                combined[-1] = cross.TextToken(combined[-1].text[:-2])
        return combined

    def reset(self):
        """Reset the parser state for reuse."""
        super().reset()
        self.tokens = []

        self.mentions = []
        self.tags = []

        self.in_pre = False
        self.in_code = False

        self.current_tag_stack = []
        self.anchor_stack = []
        self.list_stack = []