ParamManagerScripts/backend/script_groups/XML Parser to SCL/generators/generator_utils.py

# ToUpload/generators/generator_utils.py
# -*- coding: utf-8 -*-
import re
import os
import json
import traceback # Para depuración si es necesario
import sys

# --- Importar format_variable_name desde processors ---
try:
    # Asumiendo que este script está en 'generators' y 'processors' está al mismo nivel
    current_dir = os.path.dirname(os.path.abspath(__file__))
    project_base_dir = os.path.dirname(current_dir)
    processors_dir = os.path.join(project_base_dir, 'processors')
    if processors_dir not in sys.path:
         sys.path.insert(0, processors_dir) # Añadir al path si no está
    from processors.processor_utils import format_variable_name
except ImportError:
    print("Advertencia: No se pudo importar 'format_variable_name' desde processors.processor_utils.")
    print("Usando una implementación local básica.")
    def format_variable_name(name): # Fallback
        if not name: return "_INVALID_NAME_"
        if name.startswith('"') and name.endswith('"'): return name
        prefix = "#" if name.startswith("#") else ""
        if prefix: name = name[1:]
        if name and name[0].isdigit(): name = "_" + name
        name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
        return prefix + name
# --- Fin Fallback ---

# --- format_scl_start_value (Sin cambios respecto a la versión anterior) ---
def format_scl_start_value(value, datatype):
    if value is None: return None
    # Convertir complex dict a string para procesar
    if isinstance(value, dict):
        # Si tiene 'value', usar ese. Si no, representar el dict como comentario
        value_to_process = value.get('value')
        if value_to_process is None:
            return f"/* Init: {json.dumps(value)} */" # Representar dict como comentario
        value = value_to_process # Usar el valor interno

    datatype_lower = datatype.lower() if isinstance(datatype, str) else ""
    value_str = str(value)

    # Determinar si es tipo complejo (no estrictamente básico)
    is_complex_type = (
        ('"' in datatype_lower) or ('array' in datatype_lower) or ('struct' in datatype_lower) or
        datatype_lower not in {
            "bool", "int", "dint", "sint", "usint", "uint", "udint", "lint", "ulint",
            "byte", "word", "dword", "lword", "real", "lreal", "time", "ltime",
            "s5time", "date", "dt", "dtl", "tod", "string", "char", "wstring", "wchar", "variant",
            "timer", "counter", "iec_timer", "iec_counter", "iec_sfc", "iec_ld_timer" # Añadir otros tipos IEC comunes
        }
    )

    if is_complex_type:
        # Para tipos complejos, solo permitir constantes simbólicas o inicializadores básicos (0, FALSE, '')
        if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', value_str): return value_str # Constante simbólica
        if value_str == '0': return '0' # Cero numérico
        if value_str.lower() == 'false': return 'FALSE' # Booleano Falso
        if value_str == "''" or value_str == "": return "''" # String vacío
        # Ignorar otros valores iniciales para tipos complejos (incluye JSON de arrays)
        # print(f"INFO: Start value '{value_str}' for complex type '{datatype}' skipped.")
        return None

    # Quitar comillas simples/dobles externas si las hay
    value_str_unquoted = value_str
    if len(value_str) > 1:
        if value_str.startswith('"') and value_str.endswith('"'): value_str_unquoted = value_str[1:-1]
        elif value_str.startswith("'") and value_str.endswith("'"): value_str_unquoted = value_str[1:-1]

    # Formateo por tipo básico
    if any(t in datatype_lower for t in ["int","byte","word","dint","dword","lint","lword","sint","usint","uint","udint","ulint"]):
        try: return str(int(value_str_unquoted))
        except ValueError: return value_str_unquoted if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', value_str_unquoted) else None # Permitir constante simbólica
    elif "bool" in datatype_lower:
        val_low = value_str_unquoted.lower();
        if val_low in ['true', '1']: return "TRUE"
        elif val_low in ['false', '0']: return "FALSE"
        else: return value_str_unquoted if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', value_str_unquoted) else "FALSE" # Default FALSE
    elif "string" in datatype_lower or "char" in datatype_lower:
        escaped_value = value_str_unquoted.replace("'", "''") # Escapar comillas simples
        prefix = "WSTRING#" if "wstring" in datatype_lower else ("WCHAR#" if "wchar" in datatype_lower else "")
        return f"{prefix}'{escaped_value}'" # Usar comillas simples SCL
    elif "real" in datatype_lower or "lreal" in datatype_lower:
        try:
            f_val = float(value_str_unquoted)
            s_val = "{:.7g}".format(f_val) # Notación científica si es necesario, precisión limitada
            return s_val + (".0" if "." not in s_val and "e" not in s_val.lower() else "") # Añadir .0 si es entero
        except ValueError: return value_str_unquoted if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', value_str_unquoted) else None # Permitir constante simbólica
    elif "time" in datatype_lower: # Incluye TIME, LTIME, S5TIME
        prefix, val_to_use = "", value_str_unquoted
        # Extraer prefijo si ya existe (T#, LT#, S5T#)
        match_prefix = re.match(r"^(T#|LT#|S5T#)(.*)", val_to_use, re.IGNORECASE)
        if match_prefix: prefix, val_to_use = match_prefix.groups()
        # Validar formato del valor de tiempo (simplificado)
        if re.match(r'^-?(\d+d_)?(\d+h_)?(\d+m_)?(\d+s_)?(\d+ms)?$', val_to_use, re.IGNORECASE):
            target_prefix = "S5T#" if "s5time" in datatype_lower else ("LT#" if "ltime" in datatype_lower else "T#")
            return f"{target_prefix}{val_to_use}"
        elif re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', value_str_unquoted): return value_str_unquoted # Constante simbólica
        else: return None # Formato inválido
    elif any(t in datatype_lower for t in ["date", "dtl", "dt", "tod", "time_of_day"]):
         val_to_use = value_str_unquoted; prefix = ""
         # Extraer prefijo si ya existe (DTL#, D#, DT#, TOD#)
         match_prefix = re.match(r"^(DTL#|D#|DT#|TOD#)(.*)", val_to_use, re.IGNORECASE)
         if match_prefix: prefix, val_to_use = match_prefix.groups()
         # Determinar prefijo SCL correcto
         target_prefix="DTL#" if "dtl" in datatype_lower or "date_and_time" in datatype_lower else ("DT#" if "dt" in datatype_lower else ("TOD#" if "tod" in datatype_lower or "time_of_day" in datatype_lower else "D#"))
         # Validar formato (simplificado)
         if re.match(r'^\d{4}-\d{2}-\d{2}(-\d{2}:\d{2}:\d{2}(\.\d+)?)?$', val_to_use) or re.match(r'^\d{2}:\d{2}:\d{2}(\.\d+)?$', val_to_use):
             return f"{target_prefix}{val_to_use}"
         elif re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', value_str_unquoted): return value_str_unquoted # Constante simbólica
         else: return None # Formato inválido
    else: # Otros tipos o desconocidos
        return value_str if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', value_str) else None # Solo permitir constantes simbólicas


# <-- MODIFICADO: generate_scl_declarations -->
def generate_scl_declarations(variables, indent_level=1, project_root_dir=None):
    """
    Genera líneas SCL para declarar variables, manejando UDTs, FBs (InstanceOfName),
    Arrays y Structs.
    """
    scl_lines = []
    indent = "  " * indent_level
    # Lista de tipos básicos simples (en minúsculas) - ampliada
    basic_types = {
        "bool", "int", "dint", "sint", "usint", "uint", "udint", "lint", "ulint",
        "byte", "word", "dword", "lword", "real", "lreal", "time", "ltime",
        "s5time", "date", "dt", "dtl", "tod", "time_of_day", # TOD sinónimos
        "char", "wchar", "variant",
        # Tipos IEC comunes
        "timer", "counter", "iec_timer", "iec_counter", "iec_sfc", "iec_ld_timer"
    }

    # Patrones para tipos básicos parametrizados (ignorando mayúsculas/minúsculas)
    string_pattern = re.compile(r"^(W?STRING)(\[\s*\d+\s*\])?$", re.IGNORECASE)
    array_pattern = re.compile(r'^(Array\[.*\]\s+of\s+)(.*)', re.IGNORECASE)

    for var in variables:
        var_name_scl = format_variable_name(var.get("name"))
        var_dtype_raw = var.get("datatype", "VARIANT")
        # <-- NUEVO: Obtener instance_of_name -->
        instance_of_name = var.get("instance_of_name") # Puede ser None
        # <-- FIN NUEVO -->
        var_comment = var.get("comment")
        start_value_raw = var.get("start_value")
        children = var.get("children") # Para STRUCT anidados
        array_elements = var.get("array_elements") # Para inicialización de ARRAY

        declaration_dtype = var_dtype_raw # Tipo a usar en la declaración SCL
        base_type_for_init = var_dtype_raw # Tipo base para formatear valor inicial
        is_array = False
        is_struct_inline = bool(children) # Es un STRUCT definido inline
        is_potential_udt_or_fb = False # Flag para comprobar si buscar archivo .json
        type_to_check = None # Nombre limpio del tipo a buscar (UDT o FB)

        # --- Lógica Principal de Determinación de Tipo ---
        if is_struct_inline:
            # Si tiene hijos, se declara como STRUCT ... END_STRUCT
            declaration_dtype = "STRUCT"
            base_type_for_init = "STRUCT" # Valor inicial no aplica a STRUCT directamente
        elif isinstance(var_dtype_raw, str):
            # 1. Comprobar si es FB Instance usando InstanceOfName
            if instance_of_name:
                # Si InstanceOfName existe, usarlo como tipo (entre comillas)
                declaration_dtype = f'"{instance_of_name}"'
                base_type_for_init = instance_of_name # Usar nombre limpio para init/check
                is_potential_udt_or_fb = True # Marcar para buscar archivo FB
                type_to_check = instance_of_name
            else:
                # 2. No es FB Instance directo, comprobar si es Array
                array_match = array_pattern.match(var_dtype_raw)
                if array_match:
                    is_array = True
                    array_prefix_for_decl = array_match.group(1)
                    base_type_raw = array_match.group(2).strip()
                    base_type_for_init = base_type_raw # Tipo base para init/check

                    # Limpiar tipo base para comprobar si es básico/UDT/String
                    base_type_clean = base_type_raw[1:-1] if base_type_raw.startswith('"') and base_type_raw.endswith('"') else base_type_raw
                    base_type_lower = base_type_clean.lower()

                    # ¿El tipo base es UDT/FB conocido o un tipo básico/paramétrico?
                    if (base_type_lower not in basic_types and
                        not string_pattern.match(base_type_clean)):
                        # Asumir UDT/FB si no es básico ni String[N]/Char
                        declaration_dtype = f'{array_prefix_for_decl}"{base_type_clean}"' # Poner comillas
                        is_potential_udt_or_fb = True # Marcar para buscar archivo UDT/FB
                        type_to_check = base_type_clean
                    else:
                        # Es básico o String[N]/Char
                        declaration_dtype = f'{array_prefix_for_decl}{base_type_raw}' # Usar como viene (puede tener comillas si era así)
                else:
                    # 3. No es FB ni Array, ¿es UDT, String, Char o Básico?
                    base_type_clean = var_dtype_raw[1:-1] if var_dtype_raw.startswith('"') and var_dtype_raw.endswith('"') else var_dtype_raw
                    base_type_lower = base_type_clean.lower()
                    base_type_for_init = base_type_clean # Tipo base para init/check

                    if (base_type_lower not in basic_types and
                        not string_pattern.match(base_type_clean)):
                         # Asumir UDT/FB si no es básico ni String[N]/Char
                         declaration_dtype = f'"{base_type_clean}"' # Poner comillas
                         is_potential_udt_or_fb = True # Marcar para buscar archivo UDT/FB
                         type_to_check = base_type_clean
                    else:
                         # Es básico o String[N]/Char
                         declaration_dtype = var_dtype_raw # Usar como viene

        # --- Búsqueda Opcional de Archivo de Definición (UDT o FB) ---
        if is_potential_udt_or_fb and type_to_check and project_root_dir:
            # Buscar tanto en 'PLC data types' como en 'Program blocks'
            found_path = None
            type_scl_name = format_variable_name(type_to_check)
            possible_paths = [
                os.path.join(project_root_dir, 'PLC data types', 'parsing', f'{type_scl_name}_processed.json'),
                os.path.join(project_root_dir, 'Program blocks', 'parsing', f'{type_scl_name}_processed.json')
                # Añadir más rutas si la estructura del proyecto varía
            ]
            for path in possible_paths:
                if os.path.exists(path):
                    found_path = path
                    break

            if found_path:
                 print(f"    INFO: Definición '{type_to_check}' localizada en: '{os.path.relpath(found_path, project_root_dir)}'")
            else:
                 print(f"    WARNING: No se encontró definición para '{type_to_check}'. Se buscó en directorios estándar.")

        # --- Construir Línea de Declaración SCL ---
        declaration_line = f"{indent}{var_name_scl} : {declaration_dtype}"
        init_value_scl_part = ""

        if is_struct_inline:
            # Generar STRUCT anidado
            scl_lines.append(declaration_line) # Añade "VarName : STRUCT"
            # Llamada recursiva para los hijos
            scl_lines.extend(generate_scl_declarations(children, indent_level + 1, project_root_dir))
            scl_lines.append(f"{indent}END_STRUCT;")
            # Añadir comentario al END_STRUCT si existe
            if var_comment: scl_lines[-1] += f" // {var_comment}"
            scl_lines.append("") # Línea en blanco después del struct
            continue # Pasar a la siguiente variable del nivel actual

        # --- Manejo de Valor Inicial (para no-STRUCTs) ---
        init_value_scl = None
        if is_array and array_elements:
            # Inicialización de Array
            init_values = []
            try: # Intentar ordenar índices numéricamente
                indices_numeric = {int(k): v for k, v in array_elements.items()}
                sorted_indices_str = [str(k) for k in sorted(indices_numeric.keys())]
            except ValueError: # Ordenar como strings si no son numéricos
                print(f"Advertencia: Índices array no numéricos para '{var_name_scl}', ordenando como strings.")
                sorted_indices_str = sorted(array_elements.keys())

            for idx_str in sorted_indices_str:
                val_info = array_elements[idx_str] # val_info puede ser dict o valor directo
                # Formatear valor usando el tipo base del array
                formatted_val = format_scl_start_value(val_info, base_type_for_init)
                # Usar 'NULL' o comentario si el formateo falla o es complejo
                init_values.append(formatted_val if formatted_val is not None else f"/* Array[{idx_str}] unsupported init */")

            if init_values: init_value_scl = f"[{', '.join(init_values)}]"
        elif not is_array and not is_struct_inline and start_value_raw is not None:
            # Inicialización de variable simple
            init_value_scl = format_scl_start_value(start_value_raw, base_type_for_init)

        # Añadir parte del valor inicial si existe
        if init_value_scl is not None:
            init_value_scl_part = f" := {init_value_scl}"

        # Combinar todo para la línea final
        declaration_line += f"{init_value_scl_part};"
        if var_comment: declaration_line += f" // {var_comment}"
        scl_lines.append(declaration_line)

    return scl_lines