import re
from markdownify import markdownify as md
import cross
import logging, sys, os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
LOGGER = logging.getLogger("XPost")

ALTERNATE = re.compile(r'\S+|\s+')

DEFAULT_SETTINGS: dict = {
    'bluesky': {
        'quote_gate': False,
        'thread_gate': [
            'everybody'
        ]
    }
}

def tokenize_html(content: str):
    return tokenize_markdown(md(content, autolinks=False))

def tokenize_markdown(md) -> list[cross.Token]:
    tokens = []
    i = 0
    length = len(md)

    while i < length:
        if md[i] == '!' and i + 1 < length and md[i + 1] == '[':
            # media
            start = i
            i += 2
            alt_text = ''
            while i < length and md[i] != ']':
                alt_text += md[i]
                i += 1
            i += 1  # skip ']
            if i < length and md[i] == '(':
                i += 1
                url = ''
                while i < length and md[i] != ')':
                    url += md[i]
                    i += 1
                i += 1  # skip )
                #tokens.append({'type': 'media', 'alt': alt_text, 'url': url})
            else:
                tokens.append(cross.TextToken(md[start:i]))
        elif md[i] == '[':
            # link or special
            start = i
            i += 1
            link_text = ''
            while i < length and md[i] != ']':
                link_text += md[i]
                i += 1
            i += 1  # skip ]
            if i < length and md[i] == '(':
                i += 1
                url = ''
                while i < length and md[i] != ')':
                    url += md[i]
                    i += 1
                i += 1  # skip )
                if link_text.startswith('#'):
                    tokens.append(cross.TagToken(link_text[1:]))
                elif link_text.startswith('@'):
                    tokens.append(cross.MentionToken(link_text[1:], url))
                elif link_text.startswith('http://') or link_text.startswith('https://'):
                    tokens.append(cross.LinkToken(url, link_text))
                else:
                    tokens.append(cross.LinkToken(url, link_text))
            else:
                tokens.append(cross.TextToken(md[start:i]))
        else:
            # plain text
            start = i
            while i < length and md[i] != '[' and not (md[i] == '!' and i + 1 < length and md[i + 1] == '['):
                i += 1
            tokens.append(cross.TextToken(md[start:i]))
    return tokens


def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:
    def start_new_block():
        nonlocal current_block, blocks, current_length
        if current_block:
            blocks.append(current_block)
        current_block = []
        current_length = 0

    def append_text_to_block(text_segment):
        nonlocal current_block
        # if the last element in the current block is also text, just append to it
        if current_block and isinstance(current_block[-1], cross.TextToken):
            current_block[-1].text += text_segment
        else:
            current_block.append(cross.TextToken(text_segment))
    
    blocks: list[list[cross.Token]] = []
    current_block: list[cross.Token] = []
    current_length: int = 0

    for token in tokens:
        if isinstance(token, cross.TextToken):
            # split content into alternating “words” (\S+) and “whitespace” (\s+).
            # this ensures every space/newline is treated as its own segment.
            segments: list[str] = ALTERNATE.findall(token.text)

            for seg in segments:
                if seg.isspace():
                    # whitespace segment: we count it, and if it doesn't fully fit,
                    # split the whitespace across blocks to preserve exact spacing.
                    seg_len: int = len(seg)
                    while seg_len > 0:
                        space_left = max_chars - current_length
                        if space_left == 0:
                            start_new_block()
                            continue

                        take = min(space_left, seg_len)
                        part = seg[:take]
                        append_text_to_block(part)

                        current_length += len(part)
                        seg = seg[take:]
                        seg_len -= take

                        if current_length == max_chars:
                            start_new_block()

                else:
                    # seg is a “word” (no whitespace inside).
                    word: str = seg
                    wlen: int = len(word)

                    # if the word itself is longer than n, we must split it with hyphens.
                    if wlen > max_chars:
                        # first, if we're in the middle of a block, close it & start fresh.
                        if current_length > 0:
                            start_new_block()

                        remaining = word
                        # carve off (n-1)-sized chunks + “-” so each chunk is n chars.
                        while len(remaining) > (max_chars - 1):
                            chunk = remaining[: max_chars - 1] + '-'
                            append_text_to_block(chunk)
                            # that chunk fills the current block
                            start_new_block()
                            remaining = remaining[max_chars - 1 :]

                        # now whatever remains is ≤ n characters
                        if remaining:
                            append_text_to_block(remaining)
                            current_length = len(remaining)

                    else:
                        # word fits fully within a block (≤ n).
                        if current_length + wlen <= max_chars:
                            append_text_to_block(word)
                            current_length += wlen
                        else:
                            # not enough space in current block → start a new one
                            start_new_block()
                            append_text_to_block(word)
                            current_length = wlen

        elif isinstance(token, cross.LinkToken):
            link_len = min(len(token.label), 35)

            if current_length + link_len <= max_chars:
                current_block.append(token)
                current_length += link_len
            else:
                start_new_block()
                current_block.append(token)
                current_length = link_len

        elif isinstance(token, cross.TagToken):
            # we treat a hashtag like “#tagname” for counting.
            hashtag_len = 1 + len(token.tag)
            if current_length + hashtag_len <= max_chars:
                current_block.append(token)
                current_length += hashtag_len
            else:
                start_new_block()
                current_block.append(token)
                current_length = hashtag_len

        else:
            # if you happen to have other types, just append them without affecting length.
            current_block.append(token)

    # append any remaining tokens as the final block
    if current_block:
        blocks.append(current_block)

    return blocks

def safe_get(obj: dict, key: str, default):
    val = obj.get(key, default)
    return val if val else default

def value_or_envvar(text: str) -> str:
    if text.startswith('env:'):
        return os.environ.get(text[4:], '')
    return text

def get_or_envvar(obj: dict, key: str):
    return value_or_envvar(obj.get(key, ''))