ParamManagerScripts/backend/script_groups/XML Parser to SCL/parsers/parse_scl.py

# ToUpload/parsers/parse_scl.py
# -*- coding: utf-8 -*-
from lxml import etree
import re

# Importar desde las utilidades del parser
from .parser_utils import ns, get_multilingual_text


def reconstruct_scl_from_tokens(st_node):
    """
    Reconstruye SCL desde <StructuredText>, mejorando el manejo de
    variables, constantes literales, tokens básicos, espacios y saltos de línea.
    """
    if st_node is None:
        return "// Error: StructuredText node not found.\n"

    scl_parts = []
    # Usar st:* para obtener todos los elementos hijos, primero con namespace, luego sin namespace
    children = st_node.xpath("./st:*", namespaces=ns)
    if not children:
        # Si no se encuentran con namespace, buscar sin namespace
        children = st_node.xpath("./*")

    # Set to track elements that have been processed as part of array access
    processed_elements = set()

    for elem in children:
        # Skip elements that have already been processed
        elem_id = elem.get("UId")
        if elem_id and elem_id in processed_elements:
            continue

        tag = etree.QName(elem.tag).localname

        if tag == "Token":
            scl_parts.append(elem.get("Text", ""))
        elif tag == "Blank":
            # Añadir espacios solo si es necesario o más de uno
            num_spaces = int(elem.get("Num", 1))
            if not scl_parts or not scl_parts[-1].endswith(" "):
                scl_parts.append(" " * num_spaces)
            elif num_spaces > 1:
                scl_parts.append(" " * (num_spaces - 1))

        elif tag == "NewLine":
            # Quitar espacios finales antes del salto de línea
            if scl_parts:
                scl_parts[-1] = scl_parts[-1].rstrip()
            scl_parts.append("\n")
        elif tag == "Access":
            scope = elem.get("Scope")
            access_str = f"/*_ERR_Scope_{scope}_*/"  # Placeholder

            # --- Constantes Locales (estructura diferente) ---
            if scope == "LocalConstant":
                # Las constantes locales tienen estructura <Constant Name="..." /> directamente
                constant_elem = elem.xpath("./st:Constant", namespaces=ns)
                if not constant_elem:
                    constant_elem = elem.xpath("./Constant")

                if constant_elem:
                    const_name = constant_elem[0].get("Name", "_ERR_CONST_NAME_")
                    access_str = f"#{const_name}"  # Las constantes locales van con #
                else:
                    access_str = f"/*_ERR_NO_SYMBOL_IN_{scope}_*/"

            # --- Variables ---
            elif scope in [
                "GlobalVariable",
                "LocalVariable",
                "TempVariable",
                "InOutVariable",
                "InputVariable",
                "OutputVariable",
                "ConstantVariable",
                "GlobalConstant",
            ]:
                # Buscar Symbol tanto con namespace st: como sin namespace
                symbol_elem = elem.xpath("./st:Symbol", namespaces=ns)
                if not symbol_elem:
                    symbol_elem = elem.xpath("./Symbol")

                if symbol_elem:
                    # Buscar Components tanto con namespace st: como sin namespace
                    components = symbol_elem[0].xpath("./st:Component", namespaces=ns)
                    if not components:
                        components = symbol_elem[0].xpath("./Component")
                    symbol_text_parts = []
                    for i, comp in enumerate(components):
                        name = comp.get("Name", "_ERR_COMP_")
                        if i > 0:
                            symbol_text_parts.append(".")

                        # Check for HasQuotes attribute (adjust namespace if needed)
                        # El atributo está en el Component o en el Access padre? Probar ambos
                        has_quotes_comp = (
                            comp.get("HasQuotes", "false").lower() == "true"
                        )  # Check directly on Component
                        has_quotes_access = False

                        # Buscar BooleanAttribute tanto con namespace como sin namespace
                        access_parent = comp.xpath(
                            "ancestor::st:Access[1]", namespaces=ns
                        )  # Get immediate Access parent with namespace
                        if not access_parent:
                            access_parent = comp.xpath(
                                "ancestor::Access[1]"
                            )  # Get immediate Access parent without namespace

                        if access_parent:
                            has_quotes_attr = access_parent[0].xpath(
                                "./st:BooleanAttribute[@Name='HasQuotes']/text()",
                                namespaces=ns,
                            )
                            if not has_quotes_attr:
                                has_quotes_attr = access_parent[0].xpath(
                                    "./BooleanAttribute[@Name='HasQuotes']/text()"
                                )
                            has_quotes_access = (
                                has_quotes_attr and has_quotes_attr[0].lower() == "true"
                            )

                        has_quotes = has_quotes_comp or has_quotes_access
                        is_temp = name.startswith("#")

                        # Apply quotes based on HasQuotes or if it's the first component and not temp
                        if has_quotes or (
                            i == 0 and not is_temp and '"' not in name
                        ):  # Avoid double quotes
                            symbol_text_parts.append(f'"{name}"')
                        else:
                            symbol_text_parts.append(name)

                        # --- Array Index Access ---
                        # Verificar si este componente tiene hijos que indican acceso de array
                        # Buscar estructura: <Token Text="["/> <Access.../> <Token Text="]"/>
                        children = comp.xpath("./*")  # Todos los hijos directos

                        if len(children) >= 3:
                            # Verificar patrón: primer hijo es Token "[", último es Token "]"
                            first_child = children[0]
                            last_child = children[-1]
                            first_is_open_bracket = (
                                etree.QName(first_child.tag).localname == "Token"
                                and first_child.get("Text") == "["
                            )
                            last_is_close_bracket = (
                                etree.QName(last_child.tag).localname == "Token"
                                and last_child.get("Text") == "]"
                            )

                            if first_is_open_bracket and last_is_close_bracket:
                                # Hay acceso de array - procesar los elementos entre los corchetes
                                indices_parts = []

                                # Mark the bracket tokens and middle elements as processed
                                first_uid = first_child.get("UId")
                                last_uid = last_child.get("UId")
                                if first_uid:
                                    processed_elements.add(first_uid)
                                if last_uid:
                                    processed_elements.add(last_uid)

                                for middle_child in children[
                                    1:-1
                                ]:  # Todo excepto primer y último hijo
                                    middle_uid = middle_child.get("UId")
                                    if middle_uid:
                                        processed_elements.add(middle_uid)

                                    child_tag = etree.QName(middle_child.tag).localname
                                    if child_tag == "Access":
                                        # Procesar el Access para obtener el índice
                                        scope = middle_child.get("Scope")
                                        if scope == "LiteralConstant":
                                            # Buscar el valor de la constante - tanto con namespace como sin namespace
                                            constant_elem = middle_child.xpath(
                                                "./st:Constant", namespaces=ns
                                            )
                                            if not constant_elem:
                                                constant_elem = middle_child.xpath(
                                                    "./Constant"
                                                )

                                            if constant_elem:
                                                # Buscar ConstantValue tanto con namespace como sin namespace
                                                val_nodes = constant_elem[0].xpath(
                                                    "./st:ConstantValue", namespaces=ns
                                                )
                                                if not val_nodes:
                                                    val_nodes = constant_elem[0].xpath(
                                                        "./ConstantValue"
                                                    )

                                                if val_nodes and val_nodes[0].text:
                                                    indices_parts.append(
                                                        val_nodes[0].text.strip()
                                                    )
                                        else:
                                            # Para otros tipos de acceso, usar la función recursiva
                                            idx_result = reconstruct_scl_from_tokens(
                                                middle_child
                                            )
                                            if idx_result and idx_result.strip():
                                                indices_parts.append(idx_result.strip())
                                    elif child_tag == "Token":
                                        # Token de separación (como ",")
                                        token_text = middle_child.get("Text", "")
                                        if token_text.strip():
                                            indices_parts.append(token_text)

                                if indices_parts:
                                    symbol_text_parts.append(
                                        f"[{','.join(indices_parts)}]"
                                    )
                            else:
                                # No es acceso de array, buscar Access anidados de la forma tradicional
                                index_access_nodes = comp.xpath(
                                    "./st:Access", namespaces=ns
                                )
                                if not index_access_nodes:
                                    index_access_nodes = comp.xpath("./Access")

                                if index_access_nodes:
                                    indices_text = [
                                        reconstruct_scl_from_tokens(idx_node)
                                        for idx_node in index_access_nodes
                                    ]
                                    indices_cleaned = [
                                        idx.replace("\n", "").strip()
                                        for idx in indices_text
                                    ]
                                    symbol_text_parts.append(
                                        f"[{','.join(indices_cleaned)}]"
                                    )
                        else:
                            # Menos de 3 hijos, usar búsqueda tradicional de Access
                            index_access_nodes = comp.xpath(
                                "./st:Access", namespaces=ns
                            )
                            if not index_access_nodes:
                                index_access_nodes = comp.xpath("./Access")

                            if index_access_nodes:
                                indices_text = [
                                    reconstruct_scl_from_tokens(idx_node)
                                    for idx_node in index_access_nodes
                                ]
                                indices_cleaned = [
                                    idx.replace("\n", "").strip()
                                    for idx in indices_text
                                ]
                                symbol_text_parts.append(
                                    f"[{','.join(indices_cleaned)}]"
                                )

                    access_str = "".join(symbol_text_parts)
                else:
                    access_str = f"/*_ERR_NO_SYMBOL_IN_{scope}_*/"

            # --- Constantes Literales ---
            elif scope == "LiteralConstant":
                # Buscar nodos Constant tanto con namespace st: como sin namespace
                constant_elem = elem.xpath("./st:Constant", namespaces=ns)
                if not constant_elem:
                    # Si no se encuentran con namespace, buscar sin namespace
                    constant_elem = elem.xpath("./Constant")

                if constant_elem:
                    # Buscar ConstantValue tanto con namespace como sin namespace
                    val_elem = constant_elem[0].xpath(
                        "./st:ConstantValue/text()", namespaces=ns
                    )
                    if not val_elem:
                        val_elem = constant_elem[0].xpath("./ConstantValue/text()")

                    # Si no hay texto directo, buscar el texto del nodo ConstantValue
                    if not val_elem:
                        val_nodes = constant_elem[0].xpath("./ConstantValue")
                        if val_nodes and val_nodes[0].text:
                            val_elem = [val_nodes[0].text]

                    # Buscar ConstantType tanto con namespace como sin namespace
                    type_elem = constant_elem[0].xpath(
                        "./st:ConstantType/text()", namespaces=ns
                    )
                    if not type_elem:
                        type_elem = constant_elem[0].xpath("./ConstantType/text()")

                    const_val = (
                        val_elem[0].strip()
                        if val_elem and val_elem[0] is not None
                        else "_ERR_CONSTVAL_"
                    )
                    const_type = (
                        type_elem[0].strip().lower()
                        if type_elem and type_elem[0] is not None
                        else ""
                    )

                    # Si no hay tipo explícito, inferir de acuerdo al valor
                    if not const_type:
                        if const_val.lower() in ["true", "false"]:
                            const_type = "bool"
                        elif const_val.startswith("'") and const_val.endswith("'"):
                            const_type = "string"
                        elif const_val.isdigit() or (
                            const_val.startswith("-") and const_val[1:].isdigit()
                        ):
                            const_type = "int"
                        elif "." in const_val:
                            const_type = "real"
                        else:
                            const_type = ""  # Sin tipo específico, usar valor directo

                    # Formatear según tipo
                    if const_type == "bool":
                        access_str = const_val.upper()
                    elif const_type.lower() == "string":
                        if not (const_val.startswith("'") and const_val.endswith("'")):
                            replaced_val = const_val.replace("'", "''")
                            access_str = f"'{replaced_val}'"
                        else:
                            access_str = const_val
                    elif const_type.lower() == "char":
                        if not (const_val.startswith("'") and const_val.endswith("'")):
                            replaced_val = const_val.replace("'", "''")
                            access_str = f"'{replaced_val}'"
                        else:
                            access_str = const_val
                    elif const_type == "wstring":
                        replaced_val = const_val.replace("'", "''")
                        access_str = f"WSTRING#'{replaced_val}'"
                    elif const_type == "wchar":
                        replaced_val = const_val.replace("'", "''")
                        access_str = f"WCHAR#'{replaced_val}'"
                    elif const_type == "time":
                        access_str = (
                            f"T#{const_val}"
                            if not const_val.startswith("T#")
                            else const_val
                        )
                    elif const_type == "ltime":
                        access_str = (
                            f"LT#{const_val}"
                            if not const_val.startswith("LT#")
                            else const_val
                        )
                    elif const_type == "s5time":
                        access_str = (
                            f"S5T#{const_val}"
                            if not const_val.startswith("S5T#")
                            else const_val
                        )
                    elif const_type == "date":
                        access_str = (
                            f"D#{const_val}"
                            if not const_val.startswith("D#")
                            else const_val
                        )
                    elif const_type == "dtl":
                        access_str = (
                            f"DTL#{const_val}"
                            if not const_val.startswith("DTL#")
                            else const_val
                        )
                    elif const_type == "dt":
                        access_str = (
                            f"DT#{const_val}"
                            if not const_val.startswith("DT#")
                            else const_val
                        )
                    elif const_type == "tod":
                        access_str = (
                            f"TOD#{const_val}"
                            if not const_val.startswith("TOD#")
                            else const_val
                        )
                    elif const_type in [
                        "int",
                        "dint",
                        "sint",
                        "usint",
                        "uint",
                        "udint",
                        "real",
                        "lreal",
                        "word",
                        "dword",
                        "byte",
                    ]:
                        # Añadir .0 para reales si no tienen decimal
                        if (
                            const_type in ["real", "lreal"]
                            and "." not in const_val
                            and "e" not in const_val.lower()
                        ):
                            access_str = f"{const_val}.0"
                        else:
                            access_str = const_val
                    else:  # Otros tipos o sin tipo específico - usar valor directo
                        access_str = const_val
                else:
                    access_str = "/*_ERR_NOCONST_*/"

            # --- Llamadas a Funciones/Bloques (Scope=Call) ---
            elif scope == "Call":
                call_info_node = elem.xpath("./st:CallInfo", namespaces=ns)
                if call_info_node:
                    ci = call_info_node[0]
                    call_name = ci.get("Name", "_ERR_CALLNAME_")
                    call_type = ci.get("BlockType")  # FB, FC, etc.

                    # Parámetros (están como Access o Token dentro de CallInfo/Parameter)
                    params = ci.xpath("./st:Parameter", namespaces=ns)
                    param_parts = []
                    for p in params:
                        p_name = p.get("Name", "_ERR_PARAMNAME_")
                        # El valor del parámetro está dentro del nodo Parameter
                        p_value_node = p.xpath(
                            "./st:Access | ./st:Token", namespaces=ns
                        )  # Buscar Access o Token
                        p_value_scl = ""
                        if p_value_node:
                            p_value_scl = reconstruct_scl_from_tokens(
                                p
                            )  # Parsear el contenido del parámetro
                            p_value_scl = p_value_scl.replace(
                                "\n", ""
                            ).strip()  # Limpiar SCL resultante
                        param_parts.append(f"{p_name} := {p_value_scl}")

                    # Manejar FB vs FC
                    if call_type == "FB":
                        instance_node = ci.xpath(
                            "./st:Instance/st:Component/@Name", namespaces=ns
                        )
                        if instance_node:
                            instance_name = f'"{instance_node[0]}"'
                            access_str = f"{instance_name}({', '.join(param_parts)})"
                        else:  # FB sin instancia? Podría ser STAT
                            access_str = f'"{call_name}"({", ".join(param_parts)}) (* FB sin instancia explícita? *)'
                    elif call_type == "FC":
                        access_str = f'"{call_name}"({", ".join(param_parts)})'
                    else:  # Otros tipos de llamada
                        access_str = f'"{call_name}"({", ".join(param_parts)}) (* Tipo: {call_type} *)'
                else:
                    access_str = "/*_ERR_NO_CALLINFO_*/"

            # Añadir más scopes si son necesarios (e.g., Address, Label, Reference)

            scl_parts.append(access_str)

        elif tag == "Comment" or tag == "LineComment":
            # Manejar diferentes estructuras de comentarios
            if tag == "LineComment":
                # LineComment tiene estructura <Text> directa, no MultilingualText
                text_elem = elem.xpath("./st:Text", namespaces=ns)
                if not text_elem:
                    text_elem = elem.xpath("./Text")

                if text_elem and text_elem[0].text:
                    comment_text = text_elem[0].text.strip()
                    # Preservar comentarios de bloque multilinea
                    if "\n" in comment_text:
                        # Comentario multilinea: usar formato (* ... *)
                        scl_parts.append(f"(* {comment_text} *)")
                    else:
                        # Comentario de línea simple
                        scl_parts.append(f"// {comment_text}")
                else:
                    scl_parts.append("// [Comentario vacío]")
            else:
                # Comment tradicional: usar get_multilingual_text
                comment_text = get_multilingual_text(elem)
                scl_parts.append(f"(* {comment_text} *)")
        # Ignorar otros tipos de nodos si no son relevantes para el SCL

    full_scl = "".join(scl_parts)

    # --- Re-indentación Simple ---
    output_lines = []
    indent_level = 0
    indent_str = "  "  # Dos espacios
    case_indent_level = 0  # Nivel especial para manejar CASE statements

    for line in full_scl.splitlines():
        trimmed_line = line.strip()
        if not trimmed_line:
            # Mantener líneas vacías? Opcional.
            # output_lines.append("")
            continue

        line_upper = trimmed_line.upper()

        # Detectar labels de CASE (pattern: #SomeName: o SomeName:)
        is_case_label = (
            ":" in trimmed_line
            and (
                trimmed_line.startswith("#")
                or not any(
                    keyword in line_upper for keyword in ["IF", "ELSIF", "ELSE", "THEN"]
                )
            )
            and line_upper not in ["ELSE:", "ELSIF:"]
            and "//" not in trimmed_line.split(":")[0]  # Evitar comentarios
        )

        # Reducir indentación ANTES de imprimir para ciertas palabras clave
        if line_upper.startswith(("END_", "UNTIL", "}")):
            indent_level = max(0, indent_level - 1)
            if line_upper.startswith("END_CASE"):
                case_indent_level = 0
        elif line_upper in ["ELSE", "ELSIF"] and not is_case_label:
            indent_level = max(0, indent_level - 1)
        elif is_case_label and case_indent_level > 0:
            # Los labels de case van un nivel menos indentados que el contenido del case
            indent_level = max(0, case_indent_level)

        # Aplicar indentación
        current_indent = indent_level
        if is_case_label and case_indent_level > 0:
            # Los labels de case van un nivel menos que el contenido normal
            current_indent = case_indent_level

        output_lines.append(indent_str * current_indent + trimmed_line)

        # Aumentar indentación DESPUÉS de imprimir para ciertas palabras clave
        if line_upper.endswith(("THEN", "DO", "{")) or line_upper == "ELSE":
            # Excepción: No indentar después de ELSE IF
            if not (
                line_upper == "ELSE"
                and len(output_lines) > 0
                and "IF" in output_lines[-1].upper()
            ):
                indent_level += 1
        elif line_upper.startswith(("IF ", "FOR ", "WHILE ", "REPEAT", "STRUCT")):
            indent_level += 1
        elif line_upper.startswith("CASE ") and line_upper.endswith(" OF"):
            # Manejar CASE especialmente
            case_indent_level = indent_level + 1
            indent_level += 1
        elif is_case_label and case_indent_level > 0:
            # Después de un label de case, el contenido va un nivel más indentado
            indent_level = case_indent_level + 1

    return "\n".join(output_lines)


def parse_scl_network(network_element):
    """
    Parsea una red SCL extrayendo el código fuente reconstruido.
    Devuelve un diccionario representando la red para el JSON.
    """
    network_id = network_element.get("ID", "UnknownSCL_ID")
    network_lang = "SCL"  # Sabemos que es SCL

    # --- Obtener título y comentario para coherencia con otros parsers ---
    title_elem = network_element.xpath(
        "./ObjectList/MultilingualText[@CompositionName='Title']", namespaces=ns
    )
    network_title = (
        get_multilingual_text(title_elem[0]) if title_elem else f"Network {network_id}"
    )

    comment_elem = network_element.xpath(
        "./ObjectList/MultilingualText[@CompositionName='Comment']", namespaces=ns
    )
    network_comment = get_multilingual_text(comment_elem[0]) if comment_elem else ""

    # --- Buscar NetworkSource y StructuredText sin depender del namespace ---
    network_source_node = network_element.xpath(".//*[local-name()='NetworkSource']")
    structured_text_node = None
    if network_source_node:
        st_nodes = network_source_node[0].xpath(".//*[local-name()='StructuredText']")
        if st_nodes:
            structured_text_node = st_nodes[0]

    reconstructed_scl = "// SCL extraction failed: StructuredText node not found.\n"
    if structured_text_node is not None:
        reconstructed_scl = reconstruct_scl_from_tokens(structured_text_node)

    parsed_network_data = {
        "id": network_id,
        "title": network_title,
        "comment": network_comment,
        "language": network_lang,
        "logic": [
            {
                "instruction_uid": f"SCL_{network_id}",
                "type": "RAW_SCL_CHUNK",
                "scl": reconstructed_scl,
            }
        ],
    }
    return parsed_network_data


# --- Función de Información del Parser ---
def get_parser_info():
    """Devuelve la información para este parser."""
    return {
        "language": ["SCL"],  # Lista de lenguajes soportados
        "parser_func": parse_scl_network,  # Función a llamar
    }