Simatic_XML_Parser_to_SCL/ToUpload/x1_to_json.py

# ToUpload/x1_to_json.py
# -*- coding: utf-8 -*-
import json
import argparse
import os
import sys
import traceback
import importlib
from lxml import etree
from collections import defaultdict  # Puede ser necesario si load_parsers la usa
import copy  # Puede ser necesario si load_parsers la usa

# Importar funciones comunes y namespaces desde el nuevo módulo de utils
try:
    from parsers.parser_utils import ns, get_multilingual_text, parse_interface_members
except ImportError as e:
    print(
        f"Error crítico: No se pudieron importar funciones desde parsers.parser_utils: {e}"
    )
    print(
        "Asegúrate de que el directorio 'parsers' y 'parsers/parser_utils.py' existen y son correctos."
    )
    sys.exit(1)


# --- Cargador Dinámico de Parsers ---
def load_parsers(parsers_dir="parsers"):
    """
    Escanea el directorio de parsers, importa módulos y construye
    un mapa de lenguaje a función de parseo.
    """
    parser_map = {}
    # Verificar si el directorio existe
    script_dir = os.path.dirname(__file__)
    parsers_dir_path = os.path.join(script_dir, parsers_dir)
    if not os.path.isdir(parsers_dir_path):
        print(f"Error: Directorio de parsers no encontrado: '{parsers_dir_path}'")
        return parser_map  # Devuelve mapa vacío

    print(f"Cargando parsers desde: '{parsers_dir_path}'")
    parsers_package = os.path.basename(parsers_dir)

    for filename in os.listdir(parsers_dir_path):
        # Buscar archivos que empiecen con 'parse_' y terminen en '.py'
        # Excluir '__init__.py' y 'parser_utils.py'
        if (
            filename.startswith("parse_")
            and filename.endswith(".py")
            and filename not in ["__init__.py", "parser_utils.py"]
        ):
            module_name_rel = filename[:-3]  # Nombre sin .py (e.g., parse_lad_fbd)
            full_module_name = (
                f"{parsers_package}.{module_name_rel}"  # e.g., parsers.parse_lad_fbd
            )
            try:
                # Importar el módulo dinámicamente
                module = importlib.import_module(full_module_name)

                # Verificar si el módulo tiene la función get_parser_info
                if hasattr(module, "get_parser_info") and callable(
                    module.get_parser_info
                ):
                    parser_info = module.get_parser_info()
                    # Esperamos un diccionario con 'language' (lista) y 'parser_func'
                    if (
                        isinstance(parser_info, dict)
                        and "language" in parser_info
                        and "parser_func" in parser_info
                    ):
                        languages = parser_info["language"]
                        parser_func = parser_info["parser_func"]

                        if isinstance(languages, list) and callable(parser_func):
                            # Añadir la función al mapa para cada lenguaje que soporta
                            for lang in languages:
                                lang_upper = lang.upper()  # Usar mayúsculas como clave
                                if lang_upper in parser_map:
                                    print(
                                        f"  Advertencia: Parser para '{lang_upper}' en {full_module_name} sobrescribe definición anterior."
                                    )
                                parser_map[lang_upper] = parser_func
                                print(
                                    f"  - Cargado parser para '{lang_upper}' desde {module_name_rel}.py"
                                )
                        else:
                            print(
                                f"  Advertencia: Formato inválido en get_parser_info de {full_module_name} (language debe ser lista, parser_func callable)."
                            )
                    else:
                        print(
                            f"  Advertencia: get_parser_info en {full_module_name} no devolvió el diccionario esperado."
                        )
                else:
                    print(
                        f"  Advertencia: Módulo {module_name_rel}.py no tiene la función 'get_parser_info'."
                    )

            except ImportError as e:
                print(f"Error importando {full_module_name}: {e}")
            except Exception as e:
                print(f"Error procesando {full_module_name}: {e}")
                traceback.print_exc()

    print(f"\nTotal de lenguajes con parser cargado: {len(parser_map)}")
    print(f"Lenguajes soportados: {list(parser_map.keys())}")
    return parser_map


# --- Función Principal de Conversión (Refactorizada) ---
def convert_xml_to_json(xml_filepath, json_filepath, parser_map):
    """Convierte XML a JSON usando los parsers cargados dinámicamente."""
    print(f"Iniciando conversión de '{xml_filepath}' a '{json_filepath}'...")
    if not os.path.exists(xml_filepath):
        print(f"Error Crítico: Archivo XML no encontrado: '{xml_filepath}'")
        return False  # Indicar fallo

    try:
        print("Paso 1: Parseando archivo XML...")
        # Usar un parser que quite texto en blanco para simplificar XPath
        parser = etree.XMLParser(remove_blank_text=True)
        tree = etree.parse(xml_filepath, parser)
        root = tree.getroot()
        print("Paso 1: Parseo XML completado.")

        # --- Buscar bloque principal (FC, FB, GlobalDB, OB) ---
        print("Paso 2: Buscando el bloque SW.Blocks.FC/FB/GlobalDB/OB...")
        # Usar local-name() para ignorar namespaces en esta búsqueda inicial
        block_list = root.xpath(
            "//*[local-name()='SW.Blocks.FC' or local-name()='SW.Blocks.FB' or local-name()='SW.Blocks.GlobalDB' or local-name()='SW.Blocks.OB']"
        )
        if (
            not block_list
        ):  # Intentar con namespace si el anterior falla (menos probable)
            ns_doc = {
                "doc": "http://www.siemens.com/automation/Openness/SW/Document/v5"
            }  # Asumiendo este namespace
            block_list = root.xpath(
                "//doc:SW.Blocks.FC | //doc:SW.Blocks.FB | //doc:SW.Blocks.GlobalDB | //doc:SW.Blocks.OB",
                namespaces=ns_doc,
            )

        block_type_found = None
        the_block = None

        if block_list:
            the_block = block_list[0]
            block_tag_name = etree.QName(
                the_block.tag
            ).localname  # Obtener nombre local sin ns
            if block_tag_name == "SW.Blocks.FC":
                block_type_found = "FC"
            elif block_tag_name == "SW.Blocks.FB":
                block_type_found = "FB"
            elif block_tag_name == "SW.Blocks.GlobalDB":
                block_type_found = "GlobalDB"
            elif block_tag_name == "SW.Blocks.OB":
                block_type_found = "OB"
            print(
                f"Paso 2: Bloque {block_tag_name} (Tipo: {block_type_found}) encontrado (ID={the_block.get('ID')})."
            )
        else:
            print(
                "Error Crítico: No se encontró el elemento raíz del bloque (<SW.Blocks.FC/FB/GlobalDB/OB>)."
            )
            # Podríamos intentar buscar cualquier SW.Blocks.* como fallback?
            any_block = root.xpath("//*[starts-with(local-name(), 'SW.Blocks.')]")
            if any_block:
                print(
                    f"Advertencia: Se encontró un bloque genérico: {etree.QName(any_block[0].tag).localname}. Intentando continuar..."
                )
                the_block = any_block[0]
                block_type_found = "Unknown"  # Marcar como desconocido
            else:
                return False  # Fallo si no se encuentra ningún bloque

        # --- Extraer atributos del bloque ---
        print("Paso 3: Extrayendo atributos del bloque...")
        # AttributeList generalmente no tiene namespace propio
        attribute_list_node = the_block.xpath("./AttributeList")
        block_name_val, block_number_val, block_lang_val = "Unknown", None, "Unknown"
        if attribute_list_node:
            attr_list = attribute_list_node[0]
            # Name, Number, ProgrammingLanguage están directamente bajo AttributeList
            name_node = attr_list.xpath("./Name/text()")
            block_name_val = name_node[0].strip() if name_node else block_name_val
            num_node = attr_list.xpath("./Number/text()")
            try:
                block_number_val = int(num_node[0]) if num_node else None
            except (ValueError, TypeError):
                block_number_val = None  # Mantener como None si no es entero
            lang_node = attr_list.xpath("./ProgrammingLanguage/text()")
            block_lang_val = (
                lang_node[0].strip()
                if lang_node
                else ("DB" if block_type_found == "GlobalDB" else "Unknown")
            )
            print(
                f"Paso 3: Atributos: Nombre='{block_name_val}', Número={block_number_val}, Lenguaje Bloque='{block_lang_val}'"
            )
        else:
            print(
                f"Advertencia: No se encontró AttributeList para el bloque {block_type_found}."
            )
            if block_type_found == "GlobalDB":
                block_lang_val = "DB"  # Asignar lenguaje DB si es GlobalDB

        # --- Extraer comentario del bloque ---
        # ObjectList -> MultilingualText[@CompositionName='Comment']
        block_comment_val = ""
        # ObjectList tampoco suele tener namespace propio
        comment_node_list = the_block.xpath(
            "./ObjectList/MultilingualText[@CompositionName='Comment']"
        )
        if comment_node_list:
            # Usar la función de utils que maneja los namespaces internos de MultilingualText
            block_comment_val = get_multilingual_text(comment_node_list[0])
            print(f"Paso 3b: Comentario bloque: '{block_comment_val[:50]}...'")
        else:
            # Intentar buscar comentario en AttributeList como fallback?
            comment_attr_node = the_block.xpath("./AttributeList/Comment")
            if comment_attr_node:
                block_comment_val = get_multilingual_text(comment_attr_node[0])
                print(
                    f"Paso 3b (Fallback): Comentario bloque encontrado en AttributeList: '{block_comment_val[:50]}...'"
                )

        # --- Crear diccionario resultado ---
        result = {
            "block_name": block_name_val,
            "block_number": block_number_val,
            "language": block_lang_val,  # Lenguaje general del bloque
            "block_type": block_type_found,
            "block_comment": block_comment_val,
            "interface": {},
            "networks": [],
        }

        # --- Extraer interfaz ---
        print("Paso 4: Extrayendo la interfaz del bloque...")
        # Interface está dentro de AttributeList (sin ns propio), pero sus hijos usan 'iface'
        interface_node_list = (
            attribute_list_node[0].xpath("./Interface") if attribute_list_node else []
        )

        if interface_node_list:
            interface_node = interface_node_list[0]
            print("Paso 4: Nodo Interface encontrado.")
            # Sections/Section usan namespace iface
            all_sections = interface_node.xpath(".//iface:Section", namespaces=ns)
            if all_sections:
                processed_sections = set()
                for section in all_sections:
                    section_name = section.get(
                        "Name"
                    )  # Input, Output, Static, Temp, etc.
                    if not section_name or section_name in processed_sections:
                        continue
                    # Los Member dentro de Section usan namespace iface
                    members_in_section = section.xpath("./iface:Member", namespaces=ns)
                    if members_in_section:
                        # Usar la función de utils para parsear miembros
                        result["interface"][section_name] = parse_interface_members(
                            members_in_section
                        )
                        processed_sections.add(section_name)
            else:
                print(
                    "Advertencia: Nodo Interface no contiene secciones <iface:Section>."
                )

            if not result["interface"]:
                print(
                    "Advertencia: Interface encontrada pero sin secciones procesables."
                )
        else:
            # Manejo especial para DB si no hay <Interface> explícita
            if block_type_found == "GlobalDB":
                # Buscar directamente la sección Static (que usa namespace iface)
                static_members = the_block.xpath(
                    ".//iface:Section[@Name='Static']/iface:Member", namespaces=ns
                )
                if static_members:
                    print(
                        "Paso 4: Encontrada sección Static para GlobalDB (sin nodo Interface)."
                    )
                    result["interface"]["Static"] = parse_interface_members(
                        static_members
                    )
                else:
                    print("Advertencia: No se encontró sección 'Static' para GlobalDB.")
            else:
                print(
                    f"Advertencia: No se encontró <Interface> para bloque {block_type_found}."
                )

        if not result["interface"]:
            print("Advertencia: No se pudo extraer información de la interfaz.")

        # --- Procesar redes (CompileUnits) ---
        print("Paso 5: Buscando y PROCESANDO redes (CompileUnits)...")
        networks_processed_count = 0
        result["networks"] = []
        # ObjectList y SW.Blocks.CompileUnit no suelen tener namespace propio
        object_list_node = the_block.xpath("./ObjectList")

        if object_list_node:
            compile_units = object_list_node[0].xpath("./SW.Blocks.CompileUnit")
            print(
                f"Paso 5: Se encontraron {len(compile_units)} elementos SW.Blocks.CompileUnit."
            )

            # --- BUCLE PRINCIPAL DE PARSEO DE REDES (MODIFICADO) ---
            for network_elem in compile_units:
                networks_processed_count += 1
                network_id = network_elem.get("ID")
                if not network_id:
                    print("Advertencia: CompileUnit sin ID, saltando.")
                    continue

                # Detectar lenguaje de la RED (puede diferir del lenguaje del bloque)
                # AttributeList/ProgrammingLanguage sin namespace
                network_lang = "LAD"  # Default si no se encuentra
                net_attr_list = network_elem.xpath("./AttributeList")
                if net_attr_list:
                    lang_node = net_attr_list[0].xpath("./ProgrammingLanguage/text()")
                    if lang_node:
                        network_lang = lang_node[0].strip()

                print(
                    f"  - Procesando Red ID={network_id}, Lenguaje Red={network_lang}"
                )

                # --- Llamada al Parser Dinámico ---
                parser_func = parser_map.get(
                    network_lang.upper()
                )  # Buscar parser por lenguaje
                parsed_network_data = None

                if parser_func:
                    try:
                        # Llamar a la función de parseo específica del lenguaje
                        # Pasar el elemento XML de la red y los namespaces
                        parsed_network_data = parser_func(
                            network_elem
                        )  # Pasar ns ya no es necesario si están en utils
                    except Exception as e_parse:
                        print(
                            f"    ERROR durante el parseo de Red {network_id} ({network_lang}): {e_parse}"
                        )
                        traceback.print_exc()
                        # Crear diccionario de error si el parser falla
                        parsed_network_data = {
                            "id": network_id,
                            "language": network_lang,
                            "logic": [],
                            "error": f"Parser failed: {e_parse}",
                        }
                else:  # Lenguaje no soportado por ningún parser cargado
                    print(
                        f"    Advertencia: Lenguaje de red '{network_lang}' no soportado por los parsers cargados."
                    )
                    parsed_network_data = {
                        "id": network_id,
                        "language": network_lang,
                        "logic": [],
                        "error": f"Unsupported language: {network_lang}",
                    }

                # --- Añadir Título y Comentario a la Red Parseada ---
                if parsed_network_data:
                    # Usar get_multilingual_text de utils
                    title_element = network_elem.xpath(
                        ".//iface:MultilingualText[@CompositionName='Title']",
                        namespaces=ns,
                    )
                    parsed_network_data["title"] = (
                        get_multilingual_text(title_element[0])
                        if title_element
                        else f"Network {network_id}"
                    )

                    # Buscar comentario específico de la red
                    comment_elem_net = network_elem.xpath(
                        "./ObjectList/MultilingualText[@CompositionName='Comment']",
                        namespaces=ns,
                    )
                    if not comment_elem_net:  # Fallback
                        comment_elem_net = network_elem.xpath(
                            ".//MultilingualText[@CompositionName='Comment']",
                            namespaces=ns,
                        )

                    parsed_network_data["comment"] = (
                        get_multilingual_text(comment_elem_net[0])
                        if comment_elem_net
                        else ""
                    )

                    # Añadir la red procesada (o con error) al resultado
                    result["networks"].append(parsed_network_data)

            # --- Fin Bucle Redes ---

            if networks_processed_count == 0 and block_type_found != "GlobalDB":
                print(
                    f"Advertencia: ObjectList para {block_type_found} sin SW.Blocks.CompileUnit."
                )
        elif block_type_found == "GlobalDB":
            print("Paso 5: Saltando búsqueda de CompileUnits para GlobalDB (esperado).")
        else:
            print(
                f"Advertencia: No se encontró ObjectList para el bloque {block_type_found}."
            )

        # --- Escribir JSON ---
        print("Paso 6: Escribiendo el resultado en el archivo JSON...")
        # Validaciones finales opcionales
        if not result["interface"]:
            print("ADVERTENCIA FINAL: 'interface' está vacía en el JSON.")
        if not result["networks"] and block_type_found != "GlobalDB":
            print("ADVERTENCIA FINAL: 'networks' está vacía en el JSON.")

        try:
            with open(json_filepath, "w", encoding="utf-8") as f:
                json.dump(result, f, indent=4, ensure_ascii=False)
            print("Paso 6: Escritura JSON completada.")
            print(
                f"Conversión finalizada. JSON guardado en: '{os.path.relpath(json_filepath)}'"
            )
            return True  # Indicar éxito

        except IOError as e:
            print(
                f"Error Crítico: No se pudo escribir JSON en '{json_filepath}'. Error: {e}"
            )
            return False  # Indicar fallo
        except TypeError as e:
            print(
                f"Error Crítico: Problema al serializar a JSON (posiblemente datos no serializables). Error: {e}"
            )
            # Opcional: Imprimir una versión parcial o depurar 'result'
            # print("--- Datos antes de JSON DUMP (parcial) ---")
            # try: print(json.dumps({k: v for k, v in result.items() if k != 'networks'}, indent=2)) # Imprimir sin redes
            # except: print("No se pudo imprimir datos parciales.")
            return False  # Indicar fallo

    except etree.XMLSyntaxError as e:
        print(
            f"Error Crítico: Sintaxis XML inválida en '{xml_filepath}'. Detalles: {e}"
        )
        return False  # Indicar fallo
    except Exception as e:
        print(f"Error Crítico: Error inesperado durante la conversión: {e}")
        traceback.print_exc()
        return False  # Indicar fallo


# --- Punto de Entrada Principal (__main__) ---
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Convert Simatic XML (LAD/FBD/SCL/STL/OB/DB) to simplified JSON using dynamic parsers."
    )
    parser.add_argument(
        "xml_filepath",
        help="Path to the input XML file passed from the main script (x0_main.py).",
    )
    args = parser.parse_args()
    xml_input_file = args.xml_filepath

    if not os.path.exists(xml_input_file):
        print(
            f"Error Crítico (x1): Archivo XML no encontrado: '{xml_input_file}'",
            file=sys.stderr,
        )
        sys.exit(1)

    # --- Cargar Parsers Dinámicamente ---
    loaded_parsers = load_parsers()
    if not loaded_parsers:
        print("Error Crítico (x1): No se cargaron parsers. Abortando.", file=sys.stderr)
        sys.exit(1)

    # Derivar nombre de salida JSON
    xml_filename_base = os.path.splitext(os.path.basename(xml_input_file))[0]
    output_dir = os.path.dirname(xml_input_file)
    # Asegurarse que el directorio de salida exista (puede ser el mismo que el de entrada)
    os.makedirs(output_dir, exist_ok=True)
    json_output_file = os.path.join(output_dir, f"{xml_filename_base}_simplified.json")

    print(
        f"(x1) Convirtiendo: '{os.path.relpath(xml_input_file)}' -> '{os.path.relpath(json_output_file)}'"
    )

    # Llamar a la función de conversión principal
    success = convert_xml_to_json(xml_input_file, json_output_file, loaded_parsers)

    # Salir con código de error apropiado
    if success:
        sys.exit(0)  # Éxito
    else:
        print(
            f"\nError durante la conversión de '{os.path.relpath(xml_input_file)}'.",
            file=sys.stderr,
        )
        sys.exit(1)  # Fallo