from database.db import get_connection
import re


_STOPWORDS = {
    "и", "в", "во", "на", "с", "со", "к", "ко", "от", "до", "по",
    "за", "из", "у", "о", "об", "про", "для", "при", "без", "не",
    "нет", "ли", "же", "а", "но", "или", "то", "это", "все", "всё",
}


def _normalize_query(query: str):
    text = re.sub(r"[^\w\s]+", " ", query.lower(), flags=re.UNICODE)
    parts = [p for p in text.split() if p and p not in _STOPWORDS]
    # Keep tokens of length >= 3 to reduce noise
    return [p for p in parts if len(p) >= 3]


def _normalize_title(text: str) -> str:
    text = (text or "").strip().lower()
    text = re.sub(r"\s+", " ", text)
    return text


def _levenshtein(a: str, b: str) -> int:
    if a == b:
        return 0
    if not a:
        return len(b)
    if not b:
        return len(a)

    if len(a) < len(b):
        a, b = b, a

    prev = list(range(len(b) + 1))
    for i, ca in enumerate(a, start=1):
        cur = [i]
        for j, cb in enumerate(b, start=1):
            ins = cur[j - 1] + 1
            delete = prev[j] + 1
            sub = prev[j - 1] + (0 if ca == cb else 1)
            cur.append(min(ins, delete, sub))
        prev = cur
    return prev[-1]


def _fuzzy_match(token: str, word: str) -> bool:
    if token in word:
        return True
    dist = _levenshtein(token, word)
    if len(token) <= 5:
        return dist <= 1
    if len(token) <= 8:
        return dist <= 2
    return dist <= 3

def find_instructions(query: str):
    query = query.lower()
    tokens = _normalize_query(query)

    with get_connection() as conn:
        cur = conn.cursor()

        # 1️⃣ точное совпадение по коду
        cur.execute("""
            SELECT DISTINCT i.id, i.title
            FROM terminal_instruction_keys k
            JOIN terminal_instructions i ON i.id = k.instruction_id
            WHERE k.key_type = 'code' AND k.key = ?
        """, (query,))
        rows = cur.fetchall()
        if rows:
            return rows

        # 2️⃣ частичное совпадение по тексту
        if tokens:
            like_parts = " OR ".join(["k.key LIKE ?"] * len(tokens))
            params = [f"%{t}%" for t in tokens]
            cur.execute(
                f"""
                SELECT i.id, i.title, k.key
                FROM terminal_instruction_keys k
                JOIN terminal_instructions i ON i.id = k.instruction_id
                WHERE k.key_type = 'text' AND ({like_parts})
                """,
                params,
            )
            rows = cur.fetchall()
            if rows:
                scores = {}
                titles = {}
                for iid, title, key in rows:
                    key_l = (key or "").lower()
                    title_l = (title or "").lower()
                    matched = set()
                    for t in tokens:
                        if t in key_l or t in title_l:
                            matched.add(t)
                    if matched:
                        scores[iid] = max(scores.get(iid, 0), len(matched))
                        titles[iid] = title
                if scores:
                    ordered = sorted(scores.items(), key=lambda x: (-x[1], x[0]))
                    return [(iid, titles[iid]) for iid, _ in ordered]

        cur.execute("""
            SELECT DISTINCT i.id, i.title
            FROM terminal_instruction_keys k
            JOIN terminal_instructions i ON i.id = k.instruction_id
            WHERE k.key_type = 'text' AND k.key LIKE ?
        """, (f"%{query}%",))

        rows = cur.fetchall()
        if rows:
            return rows

        # 3️⃣ fuzzy поиск по ключевым словам (опечатки)
        if tokens:
            cur.execute("""
                SELECT i.id, i.title, k.key
                FROM terminal_instruction_keys k
                JOIN terminal_instructions i ON i.id = k.instruction_id
                WHERE k.key_type = 'text'
            """)
            all_rows = cur.fetchall()
            scores = {}
            titles = {}
            for iid, title, key in all_rows:
                key_words = [w for w in re.split(r"\s+", (key or "").lower()) if w]
                title_words = [w for w in re.split(r"\s+", (title or "").lower()) if w]
                matched = set()
                for t in tokens:
                    if any(_fuzzy_match(t, w) for w in key_words) or any(_fuzzy_match(t, w) for w in title_words):
                        matched.add(t)
                if matched:
                    scores[iid] = max(scores.get(iid, 0), len(matched))
                    titles[iid] = title
            if scores:
                ordered = sorted(scores.items(), key=lambda x: (-x[1], x[0]))
                return [(iid, titles[iid]) for iid, _ in ordered]

        return []

def get_terminal_steps(instruction_id: int):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            SELECT type, content
            FROM terminal_instruction_steps
            WHERE instruction_id = ?
            ORDER BY step_order
        """, (instruction_id,))
        return cur.fetchall()


def get_instruction_id_by_title(title: str):
    normalized = _normalize_title(title)
    if not normalized:
        return None

    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            SELECT id, title
            FROM terminal_instructions
        """)
        for iid, db_title in cur.fetchall():
            if _normalize_title(db_title) == normalized:
                return iid
    return None


def find_tech_problems(query: str):
    tokens = _normalize_query(query)
    if not tokens:
        return []

    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            SELECT id, task_type, keywords
            FROM tech_problems
        """)
        rows = cur.fetchall()

    scores = {}
    types = {}
    for pid, task_type, keywords in rows:
        key_words = [w for w in re.split(r"[,\s]+", (keywords or "").lower()) if w]
        matched = set()
        for t in tokens:
            if any(_fuzzy_match(t, w) for w in key_words):
                matched.add(t)
        if matched:
            scores[pid] = max(scores.get(pid, 0), len(matched))
            types[pid] = task_type

    if not scores:
        return []

    ordered = sorted(scores.items(), key=lambda x: (-x[1], x[0]))
    return [(pid, types[pid]) for pid, _ in ordered]


def get_tech_solution(problem_id: str):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            SELECT problem_id, problem_name, task_type, can_fix_self,
                   need_result_feedback, solution_steps, tools_needed,
                   when_stop_and_report
            FROM tech_problem_solutions
            WHERE problem_id = ?
        """, (problem_id,))
        return cur.fetchone()


def get_tech_problem_by_name(problem_name: str):
    normalized = _normalize_title(problem_name)
    if not normalized:
        return None

    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            SELECT problem_id, problem_name, task_type
            FROM tech_problem_solutions
        """)
        for pid, name, task_type in cur.fetchall():
            if _normalize_title(name) == normalized:
                return pid, name, task_type
    return None


def find_tech_solutions_by_name_contains(fragment: str):
    frag = _normalize_title(fragment)
    if not frag:
        return []
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute(
            """
            SELECT problem_id, problem_name, task_type
            FROM tech_problem_solutions
            WHERE lower(problem_name) LIKE ?
            """,
            (f"%{frag}%",),
        )
        return cur.fetchall()


def set_tech_problem_progress(user_id: int, problem_id: str, task_type: str, need_result_feedback: str):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            INSERT INTO tech_problem_progress (user_id, problem_id, task_type, need_result_feedback)
            VALUES (?, ?, ?, ?)
            ON CONFLICT(user_id) DO UPDATE SET
                problem_id = excluded.problem_id,
                task_type = excluded.task_type,
                need_result_feedback = excluded.need_result_feedback
        """, (user_id, problem_id, task_type, need_result_feedback))
        conn.commit()


def get_tech_problem_progress(user_id: int):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            SELECT problem_id, task_type, need_result_feedback
            FROM tech_problem_progress
            WHERE user_id = ?
        """, (user_id,))
        return cur.fetchone()


def clear_tech_problem_progress(user_id: int):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("DELETE FROM tech_problem_progress WHERE user_id = ?", (user_id,))
        conn.commit()

def set_instruction_progress(user_id: int, instruction_id: int, next_step: int, pause_at_end: bool):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            INSERT INTO instruction_progress (user_id, instruction_id, next_step, pause_at_end)
            VALUES (?, ?, ?, ?)
            ON CONFLICT(user_id) DO UPDATE SET
                instruction_id = excluded.instruction_id,
                next_step = excluded.next_step,
                pause_at_end = excluded.pause_at_end
        """, (user_id, instruction_id, next_step, 1 if pause_at_end else 0))
        conn.commit()

def get_instruction_progress(user_id: int):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("""
            SELECT instruction_id, next_step, pause_at_end
            FROM instruction_progress
            WHERE user_id = ?
        """, (user_id,))
        return cur.fetchone()

def clear_instruction_progress(user_id: int):
    with get_connection() as conn:
        cur = conn.cursor()
        cur.execute("DELETE FROM instruction_progress WHERE user_id = ?", (user_id,))
        conn.commit()