ParamManagerScripts/backend/script_groups/XML Parser to SCL/parsers/parse_stl.py

# ToUpload/parsers/parse_stl.py
# -*- coding: utf-8 -*-
from lxml import etree
import traceback
import re  # Needed for substitutions in get_access_text_stl

# Importar desde las utilidades del parser
# ns y get_multilingual_text son necesarios
from .parser_utils import ns, get_multilingual_text

# --- Funciones Auxiliares de Reconstrucción STL ---


def get_access_text_stl(access_element):
    """
    Reconstruye una representación textual simple de un Access en STL.
    Intenta manejar los diferentes tipos de acceso definidos en el XSD.
    """
    if access_element is None:
        return "_ERR_ACCESS_"

    # --- Símbolo (Variable, Constante Simbólica) ---
    # Busca <Symbol> dentro del <Access> usando el namespace stl
    symbol_elem = access_element.xpath("./stl:Symbol", namespaces=ns)
    if symbol_elem:
        components = symbol_elem[0].xpath("./stl:Component", namespaces=ns)
        parts = []
        for i, comp in enumerate(components):
            name = comp.get("Name", "_ERR_COMP_")
            # Comprobar HasQuotes (puede estar en el Access o Componente, priorizar Componente)
            has_quotes_comp = comp.get("HasQuotes", "false").lower() == "true"
            has_quotes_access = False
            access_parent = comp.xpath("ancestor::stl:Access[1]", namespaces=ns)
            if access_parent:
                has_quotes_attr = access_parent[0].xpath(
                    "./stl:BooleanAttribute[@Name='HasQuotes']/text()", namespaces=ns
                )
                has_quotes_access = (
                    has_quotes_attr and has_quotes_attr[0].lower() == "true"
                )

            has_quotes = has_quotes_comp or has_quotes_access
            is_temp = name.startswith("#")

            if i > 0:
                parts.append(".")  # Separador para estructuras

            # Aplicar comillas si es necesario
            if has_quotes or (
                i == 0 and not is_temp and '"' not in name and "." not in name
            ):
                # Añadir comillas si HasQuotes es true, o si es el primer componente,
                # no es temporal, no tiene ya comillas, y no es parte de una DB (ej. DB10.DBX0.0)
                parts.append(f'"{name}"')
            else:
                parts.append(name)

            # Índices de Array (Access anidado dentro de Component)
            index_access = comp.xpath("./stl:Access", namespaces=ns)
            if index_access:
                indices = [get_access_text_stl(ia) for ia in index_access]
                # Limpiar índices (quitar saltos de línea, etc.)
                indices_cleaned = [idx.replace("\n", "").strip() for idx in indices]
                parts.append(f"[{','.join(indices_cleaned)}]")

        return "".join(parts)

    # --- Constante Literal ---
    # Busca <Constant> dentro del <Access> usando el namespace stl
    constant_elem = access_element.xpath("./stl:Constant", namespaces=ns)
    if constant_elem:
        # Obtener valor y tipo
        val_elem = constant_elem[0].xpath("./stl:ConstantValue/text()", namespaces=ns)
        type_elem = constant_elem[0].xpath("./stl:ConstantType/text()", namespaces=ns)
        const_type = (
            type_elem[0].strip().lower()
            if type_elem and type_elem[0] is not None
            else ""
        )
        const_val = (
            val_elem[0].strip()
            if val_elem and val_elem[0] is not None
            else "_ERR_CONST_"
        )

        # Añadir prefijos estándar STL
        if const_type == "time":
            return f"T#{const_val}"
        if const_type == "s5time":
            return f"S5T#{const_val}"
        if const_type == "date":
            return f"D#{const_val}"
        if const_type == "dt":
            return f"DT#{const_val}"
        if const_type == "time_of_day" or const_type == "tod":
            return f"TOD#{const_val}"
        if const_type == "ltime":
            return f"LT#{const_val}"  # Añadido LTIME
        if const_type == "dtl":
            return f"DTL#{const_val}"  # Añadido DTL

        # Strings y Chars (Escapar comillas simples internas)
        if const_type == "string":
            replaced_val = const_val.replace("'", "''")
            return f"'{replaced_val}'"
        if const_type == "char":
            replaced_val = const_val.replace("'", "''")
            return f"'{replaced_val}'"
        if const_type == "wstring":
            replaced_val = const_val.replace("'", "''")
            return f"WSTRING#'{replaced_val}'"
        if const_type == "wchar":
            replaced_val = const_val.replace("'", "''")
            return f"WCHAR#'{replaced_val}'"

        # Tipos numéricos con prefijo opcional (Hexadecimal)
        if const_val.startswith("16#"):
            if const_type == "byte":
                return f"B#{const_val}"
            if const_type == "word":
                return f"W#{const_val}"
            if const_type == "dword":
                return f"DW#{const_val}"
            if const_type == "lword":
                return f"LW#{const_val}"  # Añadido LWORD

        # Formato Real (añadir .0 si es necesario)
        if (
            const_type in ["real", "lreal"]
            and "." not in const_val
            and "e" not in const_val.lower()
        ):
            # Verificar si es un número antes de añadir .0
            try:
                float(const_val)  # Intenta convertir a float
                return f"{const_val}.0"
            except ValueError:
                return const_val  # No es número, devolver tal cual
        # Otros tipos numéricos o desconocidos
        return const_val  # Valor por defecto

    # --- Etiqueta (Label) ---
    # Busca <Label> dentro del <Access> usando el namespace stl
    label_elem = access_element.xpath("./stl:Label", namespaces=ns)
    if label_elem:
        return label_elem[0].get("Name", "_ERR_LABEL_")

    # --- Acceso Indirecto (Punteros) ---
    # Busca <Indirect> dentro del <Access> usando el namespace stl
    indirect_elem = access_element.xpath("./stl:Indirect", namespaces=ns)
    if indirect_elem:
        reg = indirect_elem[0].get("Register", "AR?")  # AR1, AR2
        offset_str = indirect_elem[0].get("BitOffset", "0")
        area = indirect_elem[0].get("Area", "DB")  # DB, DI, L, etc.
        width = indirect_elem[0].get("Width", "X")  # Bit, Byte, Word, Double, Long
        try:
            bit_offset = int(offset_str)
            byte_offset = bit_offset // 8
            bit_in_byte = bit_offset % 8
            p_format_offset = f"P#{byte_offset}.{bit_in_byte}"
        except ValueError:
            p_format_offset = "P#?.?"

        width_map = {
            "Bit": "X",
            "Byte": "B",
            "Word": "W",
            "Double": "D",
            "Long": "D",
        }  # Mapeo XSD a STL
        width_char = width_map.get(
            width, width[0] if width else "?"
        )  # Usar primera letra como fallback

        # Área: DB, DI, L son comunes. Otras podrían necesitar mapeo.
        area_char = (
            area[0] if area else "?"
        )  # Usar primera letra (I, O, M, L, T, C, DB, DI...)

        # Formato: AREAREG[puntero], ej. DBX[AR1,P#0.0] o LX[AR2,P#10.5]
        return f"{area}{width_char}[{reg},{p_format_offset}]"

    # --- Dirección Absoluta ---
    # Busca <Address> dentro del <Access> usando el namespace stl
    address_elem = access_element.xpath("./stl:Address", namespaces=ns)
    if address_elem:
        area = address_elem[0].get(
            "Area", "??"
        )  # Input, Output, Memory, DB, DI, Local, Timer, Counter...
        bit_offset_str = address_elem[0].get("BitOffset", "0")
        # El tipo (Type) del Address define el ancho por defecto
        addr_type_str = address_elem[0].get(
            "Type", "Bool"
        )  # Bool, Byte, Word, DWord, Int, DInt, Real...
        block_num_str = address_elem[0].get(
            "BlockNumber"
        )  # Para DB10.DBX0.0 o DI5.DIW2

        try:
            bit_offset = int(bit_offset_str)
            byte_offset = bit_offset // 8
            bit_in_byte = bit_offset % 8

            # Determinar ancho (X, B, W, D) basado en Type
            addr_width = "X"  # Default bit (Bool)
            type_lower = addr_type_str.lower()
            if type_lower in ["byte", "sint", "usint"]:
                addr_width = "B"
            elif type_lower in ["word", "int", "uint", "timer", "counter"]:
                addr_width = "W"  # T y C usan W para direccionamiento base
            elif type_lower in [
                "dword",
                "dint",
                "udint",
                "real",
                "time",
                "dt",
                "tod",
                "date_and_time",
            ]:
                addr_width = "D"
            elif type_lower in [
                "lreal",
                "ltime",
                "lword",
                "lint",
                "ulint",
                "ltod",
                "ldt",
                "date_and_ltime",
            ]:
                addr_width = "D"  # Asumir que direccionamiento base usa D para L*

            # Mapear Área XML a Área STL
            area_map = {
                "Input": "I",
                "Output": "Q",
                "Memory": "M",
                "PeripheryInput": "PI",
                "PeripheryOutput": "PQ",
                "DB": "DB",
                "DI": "DI",
                "Local": "L",
                "Timer": "T",
                "Counter": "C",
            }
            stl_area = area_map.get(area, area)  # Usar nombre XML si no está en el mapa

            if stl_area in ["T", "C"]:
                # Temporizadores y Contadores usan solo el número (offset de byte)
                return f"{stl_area}{byte_offset}"  # T 5, C 10

            elif stl_area in ["DB", "DI"]:
                block_num = (
                    block_num_str if block_num_str else ""
                )  # Número de bloque si existe
                # Formato: DBNum.DBAnchoByte.Bit o DINum.DIAnchoByte.Bit o DBAnchoByte.Bit (si BlockNum es None)
                db_prefix = f"{stl_area}{block_num}." if block_num else ""
                return f"{db_prefix}{stl_area}{addr_width}{byte_offset}.{bit_in_byte}"
            else:  # I, Q, M, L, PI, PQ
                # Formato: AreaAnchoByte.Bit (ej: M B 10 . 1 -> MB10.1 ; I W 0 . 0 -> IW0.0)
                # Corrección: No añadir bit si el ancho no es X
                if addr_width == "X":
                    return f"{stl_area}{addr_width}{byte_offset}.{bit_in_byte}"
                else:
                    return f"{stl_area}{addr_width}{byte_offset}"  # ej: MB10, IW0, QW4

        except ValueError:
            return f"{area}?{bit_offset_str}?"  # Error de formato

    # --- CallInfo (para operando de CALL) ---
    # Busca <CallInfo> dentro del <Access> usando el namespace stl
    call_info_elem = access_element.xpath("./stl:CallInfo", namespaces=ns)
    if call_info_elem:
        name = call_info_elem[0].get("Name", "_ERR_CALL_")
        btype = call_info_elem[0].get("BlockType", "FC")  # FC, FB

        # El operando de CALL depende del tipo de bloque
        if btype == "FB":
            # Para CALL FB, el operando es el DB de instancia
            instance_node = call_info_elem[0].xpath(
                ".//stl:Component/@Name", namespaces=ns
            )  # Buscar nombre dentro de Instance/Component
            if instance_node:
                db_name_raw = instance_node[0]
                # Añadir comillas si no las tiene
                return f'"{db_name_raw}"' if '"' not in db_name_raw else db_name_raw
            else:
                return f'"_ERR_FB_INSTANCE_NAME_({name})_"'
        else:  # FC o desconocido
            # Para CALL FC, el operando es el nombre del FC
            # Añadir comillas si no las tiene
            return f'"{name}"' if '"' not in name else name

    # Fallback si no se reconoce el tipo de Access
    scope = access_element.get("Scope", "UnknownScope")
    return f"_{scope}_?"


def get_comment_text_stl(comment_element):
    """
    Extrae texto de un LineComment o Comment para STL usando get_multilingual_text.
    Se asume que get_multilingual_text ya está importado y maneja <Comment> y <LineComment>.
    """
    return get_multilingual_text(comment_element) if comment_element is not None else ""


def reconstruct_stl_from_statementlist(statement_list_node):
    """
    Reconstruye el código STL como una cadena de texto desde <StatementList>.
    Usa las funciones auxiliares get_access_text_stl y get_comment_text_stl.
    """
    if statement_list_node is None:
        return "// Error: StatementList node not found.\n"

    stl_lines = []
    # Buscar todos los StlStatement hijos usando el namespace 'stl'
    statements = statement_list_node.xpath("./stl:StlStatement", namespaces=ns)

    for stmt in statements:
        line_parts = []
        inline_comment = ""  # Comentarios en la misma línea

        # 1. Comentarios iniciales (línea completa //)
        # Buscar <Comment> o <LineComment> que sean hijos directos de StlStatement
        # y NO tengan el atributo Inserted="true" (o no tengan Inserted)
        initial_comments = stmt.xpath(
            "child::stl:Comment[not(@Inserted='true')] | child::stl:LineComment[not(@Inserted='true')]",
            namespaces=ns,
        )
        for comm in initial_comments:
            comment_text = get_comment_text_stl(comm)  # Usa la función auxiliar
            if comment_text:
                for comment_line in comment_text.splitlines():
                    stl_lines.append(
                        f"// {comment_line.strip()}"
                    )  # Añadir como comentario SCL

        # 2. Etiqueta (LabelDeclaration)
        # Buscar <LabelDeclaration> hijo directo
        label_decl = stmt.xpath("./stl:LabelDeclaration", namespaces=ns)
        label_str = ""
        if label_decl:
            label_name_node = label_decl[0].xpath("./stl:Label/@Name", namespaces=ns)
            if label_name_node:
                label_str = f"{label_name_node[0]}:"  # Añadir dos puntos
            # Comentarios después de la etiqueta (inline) - Tienen Inserted="true"
            label_comments = label_decl[0].xpath(
                "child::stl:Comment[@Inserted='true'] | child::stl:LineComment[@Inserted='true']",
                namespaces=ns,
            )
            for lcomm in label_comments:
                inline_comment += f" // {get_comment_text_stl(lcomm).strip()}"  # Acumular comentarios inline

        if label_str:
            line_parts.append(
                label_str
            )  # Añadir etiqueta (si existe) a las partes de la línea

        # 3. Instrucción (StlToken)
        # Buscar <StlToken> hijo directo
        instruction_token = stmt.xpath("./stl:StlToken", namespaces=ns)
        instruction_str = ""
        if instruction_token:
            token_text = instruction_token[0].get("Text", "_ERR_TOKEN_")
            # Manejar casos especiales definidos en el XSD
            if token_text == "EMPTY_LINE":
                if (
                    not stl_lines or stl_lines[-1]
                ):  # Evitar múltiples líneas vacías seguidas
                    stl_lines.append("")  # Añadir línea vacía
                continue  # Saltar resto del statement (no hay instrucción ni operando)
            elif token_text == "COMMENT":
                # Ya manejado por initial_comments. Si hubiera comentarios SÓLO aquí, se necesitaría extraerlos.
                pass  # Asumir manejado antes
            elif token_text == "Assign":
                instruction_str = "="  # Mapear Assign a '='
            elif token_text == "OPEN_DB":
                instruction_str = "AUF"  # Mapear OPEN_DB a AUF
            elif token_text == "OPEN_DI":
                instruction_str = "AUF DI"  # Mapear OPEN_DI a AUF DI
            # Añadir más mapeos si son necesarios (ej. EQ_I a ==I)
            else:
                instruction_str = token_text  # Usar el texto del token como instrucción

            # Comentarios asociados al token (inline) - Tienen Inserted="true"
            token_comments = instruction_token[0].xpath(
                "child::stl:Comment[@Inserted='true'] | child::stl:LineComment[@Inserted='true']",
                namespaces=ns,
            )
            for tcomm in token_comments:
                inline_comment += f" // {get_comment_text_stl(tcomm).strip()}"

        if instruction_str:
            # Añadir tabulación si hubo etiqueta para alinear instrucciones
            line_parts.append("\t" + instruction_str if label_str else instruction_str)

        # 4. Operando (Access)
        # Buscar <Access> hijo directo
        access_elem = stmt.xpath("./stl:Access", namespaces=ns)
        access_str = ""
        if access_elem:
            # Usar la función auxiliar para reconstruir el texto del operando
            access_text = get_access_text_stl(access_elem[0])
            access_str = access_text
            # Comentarios asociados al Access (inline) - Tienen Inserted="true"
            # Buscar DENTRO del Access
            access_comments = access_elem[0].xpath(
                "child::stl:Comment[@Inserted='true'] | child::stl:LineComment[@Inserted='true']",
                namespaces=ns,
            )
            for acc_comm in access_comments:
                inline_comment += f" // {get_comment_text_stl(acc_comm).strip()}"

        if access_str:
            line_parts.append(access_str)  # Añadir operando (si existe)

        # Construir línea final si hay partes (etiqueta, instrucción u operando)
        if line_parts:
            # Unir partes con tabulación si hay más de una (etiqueta+instrucción o instrucción+operando)
            # Ajustar espacios/tabulaciones para legibilidad
            if len(line_parts) > 1:
                # Caso Etiqueta + Instrucción + (Operando opcional)
                if label_str and instruction_str:
                    current_line = f"{line_parts[0]:<8}\t{line_parts[1]}"  # Etiqueta alineada, tab, instrucción
                    if access_str:
                        current_line += f"\t{line_parts[2]}"  # Tab, operando
                # Caso Instrucción + Operando (sin etiqueta)
                elif instruction_str and access_str:
                    current_line = f"\t{line_parts[0]}\t{line_parts[1]}"  # Tab, instrucción, tab, operando
                # Caso solo Instrucción (sin etiqueta ni operando)
                elif instruction_str:
                    current_line = f"\t{line_parts[0]}"  # Tab, instrucción
                else:  # Otros casos (solo etiqueta, solo operando? improbable)
                    current_line = "\t".join(line_parts)
            else:  # Solo una parte (instrucción sin operando o solo etiqueta?)
                current_line = line_parts[0] if label_str else f"\t{line_parts[0]}"

            # Añadir comentario inline al final si existe, con tabulación
            if inline_comment:
                current_line += f"\t{inline_comment.strip()}"

            # Añadir la línea construida si no está vacía
            if current_line.strip():
                stl_lines.append(current_line.rstrip())  # Quitar espacios finales

    # Añadir BE al final si es necesario (lógica específica del bloque, no generalizable aquí)
    # stl_lines.append("BE") # Ejemplo - QUITAR O ADAPTAR

    return "\n".join(stl_lines)


# --- Función Principal del Parser STL (Corregida v4) ---


def parse_stl_network(network_element):
    """
    Parsea una red STL extrayendo el código fuente reconstruido. (v4)
    Devuelve un diccionario representando la red para el JSON.
    """
    network_id = network_element.get("ID", "UnknownSTL_ID")
    network_lang = "STL"
    reconstructed_stl = "// STL extraction failed: Reason unknown.\n"  # Default error
    parsing_error_msg = None
    network_title = f"Network {network_id}"  # Default title
    network_comment = ""  # Default comment

    try:
        # Buscar NetworkSource usando local-name()
        network_source_node_list = network_element.xpath(
            ".//*[local-name()='NetworkSource']"
        )

        statement_list_node = None
        if network_source_node_list:
            network_source_node = network_source_node_list[0]
            # Buscar StatementList dentro del NetworkSource encontrado, usando local-name()
            statement_list_node_list = network_source_node.xpath(
                ".//*[local-name()='StatementList']"
            )
            if statement_list_node_list:
                statement_list_node = statement_list_node_list[0]
            else:
                parsing_error_msg = "StatementList node not found inside NetworkSource."
                print(f"Advertencia: {parsing_error_msg} (Red ID={network_id})")
        else:
            parsing_error_msg = "NetworkSource node not found using local-name()."
            print(f"Advertencia: {parsing_error_msg} (Red ID={network_id})")

        # Intentar reconstruir SOLO si encontramos el nodo StatementList
        if statement_list_node is not None:
            # La función reconstruct_stl_from_statementlist debe estar definida arriba
            reconstructed_stl = reconstruct_stl_from_statementlist(statement_list_node)
        elif parsing_error_msg:
            reconstructed_stl = f"// STL extraction failed: {parsing_error_msg}\n"

    except Exception as e_parse:
        parsing_error_msg = f"Exception during STL network parsing: {e_parse}"
        print(f"    ERROR parseando Red {network_id} (STL): {parsing_error_msg}")
        traceback.print_exc()
        reconstructed_stl = f"// ERROR durante el parseo de STL: {e_parse}\n"

    # Crear la estructura de datos para la red
    parsed_network_data = {
        "id": network_id,
        "language": network_lang,
        "title": network_title,
        "comment": network_comment,
        "logic": [
            {
                "instruction_uid": f"STL_{network_id}",
                "type": "RAW_STL_CHUNK",
                "stl": reconstructed_stl,
            }
        ],
    }
    if parsing_error_msg:
        parsed_network_data["error"] = f"Parser failed: {parsing_error_msg}"

    return parsed_network_data


# --- Función de Información del Parser ---
def get_parser_info():
    """Devuelve la información para este parser."""
    return {"language": ["STL"], "parser_func": parse_stl_network}