# ToUpload/parsers/parse_scl.py # -*- coding: utf-8 -*- from lxml import etree import re # Importar desde las utilidades del parser from .parser_utils import ns, get_multilingual_text def reconstruct_scl_from_tokens(st_node): """ Reconstruye SCL desde , mejorando el manejo de variables, constantes literales, tokens básicos, espacios y saltos de línea. """ if st_node is None: return "// Error: StructuredText node not found.\n" scl_parts = [] # Usar st:* para obtener todos los elementos hijos, primero con namespace, luego sin namespace children = st_node.xpath("./st:*", namespaces=ns) if not children: # Si no se encuentran con namespace, buscar sin namespace children = st_node.xpath("./*") # Set to track elements that have been processed as part of array access processed_elements = set() for elem in children: # Skip elements that have already been processed elem_id = elem.get("UId") if elem_id and elem_id in processed_elements: continue tag = etree.QName(elem.tag).localname if tag == "Token": scl_parts.append(elem.get("Text", "")) elif tag == "Blank": # Añadir espacios solo si es necesario o más de uno num_spaces = int(elem.get("Num", 1)) if not scl_parts or not scl_parts[-1].endswith(" "): scl_parts.append(" " * num_spaces) elif num_spaces > 1: scl_parts.append(" " * (num_spaces - 1)) elif tag == "NewLine": # Quitar espacios finales antes del salto de línea if scl_parts: scl_parts[-1] = scl_parts[-1].rstrip() scl_parts.append("\n") elif tag == "Access": scope = elem.get("Scope") access_str = f"/*_ERR_Scope_{scope}_*/" # Placeholder # --- Constantes Locales (estructura diferente) --- if scope == "LocalConstant": # Las constantes locales tienen estructura directamente constant_elem = elem.xpath("./st:Constant", namespaces=ns) if not constant_elem: constant_elem = elem.xpath("./Constant") if constant_elem: const_name = constant_elem[0].get("Name", "_ERR_CONST_NAME_") access_str = f"#{const_name}" # Las constantes locales van con # else: access_str = f"/*_ERR_NO_SYMBOL_IN_{scope}_*/" # --- Variables --- elif scope in [ "GlobalVariable", "LocalVariable", "TempVariable", "InOutVariable", "InputVariable", "OutputVariable", "ConstantVariable", "GlobalConstant", ]: # Buscar Symbol tanto con namespace st: como sin namespace symbol_elem = elem.xpath("./st:Symbol", namespaces=ns) if not symbol_elem: symbol_elem = elem.xpath("./Symbol") if symbol_elem: # Buscar Components tanto con namespace st: como sin namespace components = symbol_elem[0].xpath("./st:Component", namespaces=ns) if not components: components = symbol_elem[0].xpath("./Component") symbol_text_parts = [] for i, comp in enumerate(components): name = comp.get("Name", "_ERR_COMP_") if i > 0: symbol_text_parts.append(".") # Check for HasQuotes attribute (adjust namespace if needed) # El atributo está en el Component o en el Access padre? Probar ambos has_quotes_comp = ( comp.get("HasQuotes", "false").lower() == "true" ) # Check directly on Component has_quotes_access = False # Buscar BooleanAttribute tanto con namespace como sin namespace access_parent = comp.xpath( "ancestor::st:Access[1]", namespaces=ns ) # Get immediate Access parent with namespace if not access_parent: access_parent = comp.xpath( "ancestor::Access[1]" ) # Get immediate Access parent without namespace if access_parent: has_quotes_attr = access_parent[0].xpath( "./st:BooleanAttribute[@Name='HasQuotes']/text()", namespaces=ns, ) if not has_quotes_attr: has_quotes_attr = access_parent[0].xpath( "./BooleanAttribute[@Name='HasQuotes']/text()" ) has_quotes_access = ( has_quotes_attr and has_quotes_attr[0].lower() == "true" ) has_quotes = has_quotes_comp or has_quotes_access is_temp = name.startswith("#") # Para variables locales, usar prefijo # en lugar de comillas if scope == "LocalVariable" and i == 0 and not is_temp: symbol_text_parts.append(f"#{name}") # Apply quotes based on HasQuotes or if it's the first component and not temp elif has_quotes or ( i == 0 and not is_temp and '"' not in name and scope != "LocalVariable" ): # Avoid double quotes symbol_text_parts.append(f'"{name}"') else: symbol_text_parts.append(name) # --- Array Index Access --- # Verificar si este componente tiene hijos que indican acceso de array # Buscar estructura: children = comp.xpath("./*") # Todos los hijos directos if len(children) >= 3: # Verificar patrón: primer hijo es Token "[", último es Token "]" first_child = children[0] last_child = children[-1] first_is_open_bracket = ( etree.QName(first_child.tag).localname == "Token" and first_child.get("Text") == "[" ) last_is_close_bracket = ( etree.QName(last_child.tag).localname == "Token" and last_child.get("Text") == "]" ) if first_is_open_bracket and last_is_close_bracket: # Hay acceso de array - procesar los elementos entre los corchetes indices_parts = [] # Mark the bracket tokens and middle elements as processed first_uid = first_child.get("UId") last_uid = last_child.get("UId") if first_uid: processed_elements.add(first_uid) if last_uid: processed_elements.add(last_uid) for middle_child in children[ 1:-1 ]: # Todo excepto primer y último hijo middle_uid = middle_child.get("UId") if middle_uid: processed_elements.add(middle_uid) child_tag = etree.QName(middle_child.tag).localname if child_tag == "Access": # Procesar el Access para obtener el índice scope = middle_child.get("Scope") if scope == "LiteralConstant": # Buscar el valor de la constante - tanto con namespace como sin namespace constant_elem = middle_child.xpath( "./st:Constant", namespaces=ns ) if not constant_elem: constant_elem = middle_child.xpath( "./Constant" ) if constant_elem: # Buscar ConstantValue tanto con namespace como sin namespace val_nodes = constant_elem[0].xpath( "./st:ConstantValue", namespaces=ns ) if not val_nodes: val_nodes = constant_elem[0].xpath( "./ConstantValue" ) if val_nodes and val_nodes[0].text: indices_parts.append( val_nodes[0].text.strip() ) else: # Para otros tipos de acceso, procesar manualmente en lugar de recursión if ( middle_child.get("Scope") == "LocalVariable" ): # Procesar LocalVariable manualmente symbol_elem = middle_child.xpath( "./st:Symbol", namespaces=ns ) if not symbol_elem: symbol_elem = middle_child.xpath( "./Symbol" ) if symbol_elem: components = symbol_elem[0].xpath( "./st:Component", namespaces=ns ) if not components: components = symbol_elem[ 0 ].xpath("./Component") # Construir la variable manualmente var_parts = [] for i, comp in enumerate( components ): name = comp.get( "Name", "_ERR_COMP_" ) if i == 0: var_parts.append( f"#{name}" ) # Primer componente con # else: var_parts.append( f".{name}" ) # Componentes subsecuentes con . idx_result = "".join(var_parts) if idx_result: indices_parts.append(idx_result) else: indices_parts.append( "/*_ERR_EMPTY_VAR_*/" ) else: indices_parts.append( "/*_ERR_NO_SYMBOL_*/" ) else: # Para otros scopes, usar recursión como fallback idx_result = ( reconstruct_scl_from_tokens( middle_child ) ) if idx_result and idx_result.strip(): indices_parts.append( idx_result.strip() ) else: indices_parts.append( "/*_ERR_RECURSIVE_EMPTY_*/" ) elif child_tag == "Token": # Token de separación (como ",") token_text = middle_child.get("Text", "") if token_text.strip(): indices_parts.append(token_text) if indices_parts: symbol_text_parts.append( f"[{','.join(indices_parts)}]" ) else: # No es acceso de array, buscar Access anidados de la forma tradicional index_access_nodes = comp.xpath( "./st:Access", namespaces=ns ) if not index_access_nodes: index_access_nodes = comp.xpath("./Access") if index_access_nodes: indices_text = [ reconstruct_scl_from_tokens(idx_node) for idx_node in index_access_nodes ] indices_cleaned = [ idx.replace("\n", "").strip() for idx in indices_text ] symbol_text_parts.append( f"[{','.join(indices_cleaned)}]" ) else: # Menos de 3 hijos, usar búsqueda tradicional de Access index_access_nodes = comp.xpath( "./st:Access", namespaces=ns ) if not index_access_nodes: index_access_nodes = comp.xpath("./Access") if index_access_nodes: indices_text = [ reconstruct_scl_from_tokens(idx_node) for idx_node in index_access_nodes ] indices_cleaned = [ idx.replace("\n", "").strip() for idx in indices_text ] symbol_text_parts.append( f"[{','.join(indices_cleaned)}]" ) access_str = "".join(symbol_text_parts) else: access_str = f"/*_ERR_NO_SYMBOL_IN_{scope}_*/" # --- Constantes Tipadas (TypedConstant) --- elif scope == "TypedConstant": constant_elem = elem.xpath("./st:Constant", namespaces=ns) if not constant_elem: constant_elem = elem.xpath("./Constant") if constant_elem: const_value_elem = constant_elem[0].xpath( "./st:ConstantValue", namespaces=ns ) if not const_value_elem: const_value_elem = constant_elem[0].xpath("./ConstantValue") if const_value_elem and const_value_elem[0].text: const_val = const_value_elem[0].text.strip() # Para constantes tipadas, usar el valor directamente (ya incluye el tipo como T#5s) access_str = const_val else: access_str = "/*_ERR_NO_CONST_VALUE_*/" else: access_str = "/*_ERR_NO_CONST_ELEM_*/" # --- Constantes Literales --- elif scope == "LiteralConstant": # Buscar nodos Constant tanto con namespace st: como sin namespace constant_elem = elem.xpath("./st:Constant", namespaces=ns) if not constant_elem: # Si no se encuentran con namespace, buscar sin namespace constant_elem = elem.xpath("./Constant") if constant_elem: # Buscar ConstantValue tanto con namespace como sin namespace val_elem = constant_elem[0].xpath( "./st:ConstantValue/text()", namespaces=ns ) if not val_elem: val_elem = constant_elem[0].xpath("./ConstantValue/text()") # Si no hay texto directo, buscar el texto del nodo ConstantValue if not val_elem: val_nodes = constant_elem[0].xpath("./ConstantValue") if val_nodes and val_nodes[0].text: val_elem = [val_nodes[0].text] # Buscar ConstantType tanto con namespace como sin namespace type_elem = constant_elem[0].xpath( "./st:ConstantType/text()", namespaces=ns ) if not type_elem: type_elem = constant_elem[0].xpath("./ConstantType/text()") const_val = ( val_elem[0].strip() if val_elem and val_elem[0] is not None else "_ERR_CONSTVAL_" ) const_type = ( type_elem[0].strip().lower() if type_elem and type_elem[0] is not None else "" ) # Si no hay tipo explícito, inferir de acuerdo al valor if not const_type: if const_val.lower() in ["true", "false"]: const_type = "bool" elif const_val.startswith("'") and const_val.endswith("'"): const_type = "string" elif const_val.isdigit() or ( const_val.startswith("-") and const_val[1:].isdigit() ): const_type = "int" elif "." in const_val: const_type = "real" else: const_type = "" # Sin tipo específico, usar valor directo # Formatear según tipo if const_type == "bool": access_str = const_val.upper() elif const_type.lower() == "string": if not (const_val.startswith("'") and const_val.endswith("'")): replaced_val = const_val.replace("'", "''") access_str = f"'{replaced_val}'" else: access_str = const_val elif const_type.lower() == "char": if not (const_val.startswith("'") and const_val.endswith("'")): replaced_val = const_val.replace("'", "''") access_str = f"'{replaced_val}'" else: access_str = const_val elif const_type == "wstring": replaced_val = const_val.replace("'", "''") access_str = f"WSTRING#'{replaced_val}'" elif const_type == "wchar": replaced_val = const_val.replace("'", "''") access_str = f"WCHAR#'{replaced_val}'" elif const_type == "time": access_str = ( f"T#{const_val}" if not const_val.startswith("T#") else const_val ) elif const_type == "ltime": access_str = ( f"LT#{const_val}" if not const_val.startswith("LT#") else const_val ) elif const_type == "s5time": access_str = ( f"S5T#{const_val}" if not const_val.startswith("S5T#") else const_val ) elif const_type == "date": access_str = ( f"D#{const_val}" if not const_val.startswith("D#") else const_val ) elif const_type == "dtl": access_str = ( f"DTL#{const_val}" if not const_val.startswith("DTL#") else const_val ) elif const_type == "dt": access_str = ( f"DT#{const_val}" if not const_val.startswith("DT#") else const_val ) elif const_type == "tod": access_str = ( f"TOD#{const_val}" if not const_val.startswith("TOD#") else const_val ) elif const_type in [ "int", "dint", "sint", "usint", "uint", "udint", "real", "lreal", "word", "dword", "byte", ]: # Añadir .0 para reales si no tienen decimal if ( const_type in ["real", "lreal"] and "." not in const_val and "e" not in const_val.lower() ): access_str = f"{const_val}.0" else: access_str = const_val else: # Otros tipos o sin tipo específico - usar valor directo access_str = const_val else: access_str = "/*_ERR_NOCONST_*/" # --- Llamadas a Funciones/Bloques (Scope=Call) --- elif scope == "Call": # Primero intentar con CallInfo (estructura tradicional) call_info_node = elem.xpath("./st:CallInfo", namespaces=ns) if call_info_node: ci = call_info_node[0] call_name = ci.get("Name", "_ERR_CALLNAME_") call_type = ci.get("BlockType") # FB, FC, etc. # Parámetros (están como Access o Token dentro de CallInfo/Parameter) params = ci.xpath("./st:Parameter", namespaces=ns) param_parts = [] for p in params: p_name = p.get("Name", "_ERR_PARAMNAME_") # El valor del parámetro está dentro del nodo Parameter p_value_node = p.xpath( "./st:Access | ./st:Token", namespaces=ns ) # Buscar Access o Token p_value_scl = "" if p_value_node: p_value_scl = reconstruct_scl_from_tokens( p ) # Parsear el contenido del parámetro p_value_scl = p_value_scl.replace( "\n", "" ).strip() # Limpiar SCL resultante param_parts.append(f"{p_name} := {p_value_scl}") # Manejar FB vs FC if call_type == "FB": instance_node = ci.xpath( "./st:Instance/st:Component/@Name", namespaces=ns ) if instance_node: instance_name = f'"{instance_node[0]}"' access_str = f"{instance_name}({', '.join(param_parts)})" else: # FB sin instancia? Podría ser STAT access_str = f'"{call_name}"({", ".join(param_parts)}) (* FB sin instancia explícita? *)' elif call_type == "FC": access_str = f'"{call_name}"({", ".join(param_parts)})' else: # Otros tipos de llamada access_str = f'"{call_name}"({", ".join(param_parts)}) (* Tipo: {call_type} *)' # Si no hay CallInfo, intentar con Instruction (estructura de SCL nativo) else: instruction_node = elem.xpath("./st:Instruction", namespaces=ns) if instruction_node: instr = instruction_node[0] instr_name = instr.get( "Name" ) # Puede ser None para llamadas sin nombre específico # Parámetros con nombre y sin nombre named_params = instr.xpath("./st:Parameter", namespaces=ns) nameless_params = instr.xpath( "./st:NamelessParameter", namespaces=ns ) param_parts = [] # Procesar parámetros con nombre for p in named_params: p_name = p.get("Name", "_ERR_PARAMNAME_") # Reconstruir el valor del parámetro p_value_scl = reconstruct_scl_from_tokens(p) p_value_scl = p_value_scl.replace("\n", "").strip() # Si el valor ya contiene ":=", no lo duplicar if p_value_scl.startswith(":="): param_parts.append(f"{p_name} {p_value_scl}") elif p_value_scl: param_parts.append(f"{p_name} := {p_value_scl}") else: param_parts.append(f"{p_name} := /*_ERR_PARAM_VALUE_*/") # Procesar parámetros sin nombre for p in nameless_params: p_value_scl = reconstruct_scl_from_tokens(p) p_value_scl = p_value_scl.replace("\n", "").strip() if p_value_scl: param_parts.append(p_value_scl) else: param_parts.append("/*_ERR_NAMELESS_PARAM_*/") # Construir la llamada if instr_name: access_str = f'"{instr_name}"({", ".join(param_parts)})' else: # Llamada sin nombre específico, probablemente un FB instance call access_str = f'({", ".join(param_parts)})' else: access_str = "/*_ERR_NO_CALLINFO_*/" # Añadir más scopes si son necesarios (e.g., Address, Label, Reference) scl_parts.append(access_str) elif tag == "Comment" or tag == "LineComment": # Manejar diferentes estructuras de comentarios if tag == "LineComment": # LineComment tiene estructura directa, no MultilingualText text_elem = elem.xpath("./st:Text", namespaces=ns) if not text_elem: text_elem = elem.xpath("./Text") if text_elem and text_elem[0].text: comment_text = text_elem[0].text.strip() # Preservar comentarios de bloque multilinea if "\n" in comment_text: # Comentario multilinea: usar formato (* ... *) scl_parts.append(f"(* {comment_text} *)") else: # Comentario de línea simple scl_parts.append(f"// {comment_text}") else: scl_parts.append("// [Comentario vacío]") else: # Comment tradicional: usar get_multilingual_text comment_text = get_multilingual_text(elem) scl_parts.append(f"(* {comment_text} *)") # Ignorar otros tipos de nodos si no son relevantes para el SCL full_scl = "".join(scl_parts) # --- Re-indentación Simple --- output_lines = [] indent_level = 0 indent_str = " " # Dos espacios case_indent_level = 0 # Nivel especial para manejar CASE statements for line in full_scl.splitlines(): trimmed_line = line.strip() if not trimmed_line: # Mantener líneas vacías? Opcional. # output_lines.append("") continue line_upper = trimmed_line.upper() # Detectar labels de CASE (pattern: #SomeName: o SomeName:) is_case_label = ( ":" in trimmed_line and ( trimmed_line.startswith("#") or not any( keyword in line_upper for keyword in ["IF", "ELSIF", "ELSE", "THEN"] ) ) and line_upper not in ["ELSE:", "ELSIF:"] and "//" not in trimmed_line.split(":")[0] # Evitar comentarios ) # Reducir indentación ANTES de imprimir para ciertas palabras clave if line_upper.startswith(("END_", "UNTIL", "}")): indent_level = max(0, indent_level - 1) if line_upper.startswith("END_CASE"): case_indent_level = 0 elif line_upper in ["ELSE", "ELSIF"] and not is_case_label: indent_level = max(0, indent_level - 1) elif is_case_label and case_indent_level > 0: # Los labels de case van un nivel menos indentados que el contenido del case indent_level = max(0, case_indent_level) # Aplicar indentación current_indent = indent_level if is_case_label and case_indent_level > 0: # Los labels de case van un nivel menos que el contenido normal current_indent = case_indent_level output_lines.append(indent_str * current_indent + trimmed_line) # Aumentar indentación DESPUÉS de imprimir para ciertas palabras clave if line_upper.endswith(("THEN", "DO", "{")) or line_upper == "ELSE": # Excepción: No indentar después de ELSE IF if not ( line_upper == "ELSE" and len(output_lines) > 0 and "IF" in output_lines[-1].upper() ): indent_level += 1 elif line_upper.startswith(("IF ", "FOR ", "WHILE ", "REPEAT", "STRUCT")): indent_level += 1 elif line_upper.startswith("CASE ") and line_upper.endswith(" OF"): # Manejar CASE especialmente case_indent_level = indent_level + 1 indent_level += 1 elif is_case_label and case_indent_level > 0: # Después de un label de case, el contenido va un nivel más indentado indent_level = case_indent_level + 1 return "\n".join(output_lines) def parse_scl_network(network_element): """ Parsea una red SCL extrayendo el código fuente reconstruido. Devuelve un diccionario representando la red para el JSON. """ network_id = network_element.get("ID", "UnknownSCL_ID") network_lang = "SCL" # Sabemos que es SCL # --- Obtener título y comentario para coherencia con otros parsers --- title_elem = network_element.xpath( "./ObjectList/MultilingualText[@CompositionName='Title']", namespaces=ns ) network_title = ( get_multilingual_text(title_elem[0]) if title_elem else f"Network {network_id}" ) comment_elem = network_element.xpath( "./ObjectList/MultilingualText[@CompositionName='Comment']", namespaces=ns ) network_comment = get_multilingual_text(comment_elem[0]) if comment_elem else "" # --- Buscar NetworkSource y StructuredText sin depender del namespace --- network_source_node = network_element.xpath(".//*[local-name()='NetworkSource']") structured_text_node = None if network_source_node: st_nodes = network_source_node[0].xpath(".//*[local-name()='StructuredText']") if st_nodes: structured_text_node = st_nodes[0] reconstructed_scl = "// SCL extraction failed: StructuredText node not found.\n" if structured_text_node is not None: reconstructed_scl = reconstruct_scl_from_tokens(structured_text_node) parsed_network_data = { "id": network_id, "title": network_title, "comment": network_comment, "language": network_lang, "logic": [ { "instruction_uid": f"SCL_{network_id}", "type": "RAW_SCL_CHUNK", "scl": reconstructed_scl, } ], } return parsed_network_data # --- Función de Información del Parser --- def get_parser_info(): """Devuelve la información para este parser.""" return { "language": ["SCL"], # Lista de lenguajes soportados "parser_func": parse_scl_network, # Función a llamar }