ParamManagerScripts/backend/script_groups/XML Parser to SCL/parsers/parse_scl.py

744 lines
36 KiB
Python

# ToUpload/parsers/parse_scl.py
# -*- coding: utf-8 -*-
from lxml import etree
import re
# Importar desde las utilidades del parser
from .parser_utils import ns, get_multilingual_text
def reconstruct_scl_from_tokens(st_node):
"""
Reconstruye SCL desde <StructuredText>, mejorando el manejo de
variables, constantes literales, tokens básicos, espacios y saltos de línea.
"""
if st_node is None:
return "// Error: StructuredText node not found.\n"
scl_parts = []
# Usar st:* para obtener todos los elementos hijos, primero con namespace, luego sin namespace
children = st_node.xpath("./st:*", namespaces=ns)
if not children:
# Si no se encuentran con namespace, buscar sin namespace
children = st_node.xpath("./*")
# Set to track elements that have been processed as part of array access
processed_elements = set()
for elem in children:
# Skip elements that have already been processed
elem_id = elem.get("UId")
if elem_id and elem_id in processed_elements:
continue
tag = etree.QName(elem.tag).localname
if tag == "Token":
scl_parts.append(elem.get("Text", ""))
elif tag == "Blank":
# Añadir espacios solo si es necesario o más de uno
num_spaces = int(elem.get("Num", 1))
if not scl_parts or not scl_parts[-1].endswith(" "):
scl_parts.append(" " * num_spaces)
elif num_spaces > 1:
scl_parts.append(" " * (num_spaces - 1))
elif tag == "NewLine":
# Quitar espacios finales antes del salto de línea
if scl_parts:
scl_parts[-1] = scl_parts[-1].rstrip()
scl_parts.append("\n")
elif tag == "Access":
scope = elem.get("Scope")
access_str = f"/*_ERR_Scope_{scope}_*/" # Placeholder
# --- Constantes Locales (estructura diferente) ---
if scope == "LocalConstant":
# Las constantes locales tienen estructura <Constant Name="..." /> directamente
constant_elem = elem.xpath("./st:Constant", namespaces=ns)
if not constant_elem:
constant_elem = elem.xpath("./Constant")
if constant_elem:
const_name = constant_elem[0].get("Name", "_ERR_CONST_NAME_")
access_str = f"#{const_name}" # Las constantes locales van con #
else:
access_str = f"/*_ERR_NO_SYMBOL_IN_{scope}_*/"
# --- Variables ---
elif scope in [
"GlobalVariable",
"LocalVariable",
"TempVariable",
"InOutVariable",
"InputVariable",
"OutputVariable",
"ConstantVariable",
"GlobalConstant",
]:
# Buscar Symbol tanto con namespace st: como sin namespace
symbol_elem = elem.xpath("./st:Symbol", namespaces=ns)
if not symbol_elem:
symbol_elem = elem.xpath("./Symbol")
if symbol_elem:
# Buscar Components tanto con namespace st: como sin namespace
components = symbol_elem[0].xpath("./st:Component", namespaces=ns)
if not components:
components = symbol_elem[0].xpath("./Component")
symbol_text_parts = []
for i, comp in enumerate(components):
name = comp.get("Name", "_ERR_COMP_")
if i > 0:
symbol_text_parts.append(".")
# Check for HasQuotes attribute (adjust namespace if needed)
# El atributo está en el Component o en el Access padre? Probar ambos
has_quotes_comp = (
comp.get("HasQuotes", "false").lower() == "true"
) # Check directly on Component
has_quotes_access = False
# Buscar BooleanAttribute tanto con namespace como sin namespace
access_parent = comp.xpath(
"ancestor::st:Access[1]", namespaces=ns
) # Get immediate Access parent with namespace
if not access_parent:
access_parent = comp.xpath(
"ancestor::Access[1]"
) # Get immediate Access parent without namespace
if access_parent:
has_quotes_attr = access_parent[0].xpath(
"./st:BooleanAttribute[@Name='HasQuotes']/text()",
namespaces=ns,
)
if not has_quotes_attr:
has_quotes_attr = access_parent[0].xpath(
"./BooleanAttribute[@Name='HasQuotes']/text()"
)
has_quotes_access = (
has_quotes_attr and has_quotes_attr[0].lower() == "true"
)
has_quotes = has_quotes_comp or has_quotes_access
is_temp = name.startswith("#")
# Para variables locales, usar prefijo # en lugar de comillas
if scope == "LocalVariable" and i == 0 and not is_temp:
symbol_text_parts.append(f"#{name}")
# Apply quotes based on HasQuotes or if it's the first component and not temp
elif has_quotes or (
i == 0
and not is_temp
and '"' not in name
and scope != "LocalVariable"
): # Avoid double quotes
symbol_text_parts.append(f'"{name}"')
else:
symbol_text_parts.append(name)
# --- Array Index Access ---
# Verificar si este componente tiene hijos que indican acceso de array
# Buscar estructura: <Token Text="["/> <Access.../> <Token Text="]"/>
children = comp.xpath("./*") # Todos los hijos directos
if len(children) >= 3:
# Verificar patrón: primer hijo es Token "[", último es Token "]"
first_child = children[0]
last_child = children[-1]
first_is_open_bracket = (
etree.QName(first_child.tag).localname == "Token"
and first_child.get("Text") == "["
)
last_is_close_bracket = (
etree.QName(last_child.tag).localname == "Token"
and last_child.get("Text") == "]"
)
if first_is_open_bracket and last_is_close_bracket:
# Hay acceso de array - procesar los elementos entre los corchetes
indices_parts = []
# Mark the bracket tokens and middle elements as processed
first_uid = first_child.get("UId")
last_uid = last_child.get("UId")
if first_uid:
processed_elements.add(first_uid)
if last_uid:
processed_elements.add(last_uid)
for middle_child in children[
1:-1
]: # Todo excepto primer y último hijo
middle_uid = middle_child.get("UId")
if middle_uid:
processed_elements.add(middle_uid)
child_tag = etree.QName(middle_child.tag).localname
if child_tag == "Access":
# Procesar el Access para obtener el índice
scope = middle_child.get("Scope")
if scope == "LiteralConstant":
# Buscar el valor de la constante - tanto con namespace como sin namespace
constant_elem = middle_child.xpath(
"./st:Constant", namespaces=ns
)
if not constant_elem:
constant_elem = middle_child.xpath(
"./Constant"
)
if constant_elem:
# Buscar ConstantValue tanto con namespace como sin namespace
val_nodes = constant_elem[0].xpath(
"./st:ConstantValue", namespaces=ns
)
if not val_nodes:
val_nodes = constant_elem[0].xpath(
"./ConstantValue"
)
if val_nodes and val_nodes[0].text:
indices_parts.append(
val_nodes[0].text.strip()
)
else:
# Para otros tipos de acceso, procesar manualmente en lugar de recursión
if (
middle_child.get("Scope")
== "LocalVariable"
):
# Procesar LocalVariable manualmente
symbol_elem = middle_child.xpath(
"./st:Symbol", namespaces=ns
)
if not symbol_elem:
symbol_elem = middle_child.xpath(
"./Symbol"
)
if symbol_elem:
components = symbol_elem[0].xpath(
"./st:Component", namespaces=ns
)
if not components:
components = symbol_elem[
0
].xpath("./Component")
# Construir la variable manualmente
var_parts = []
for i, comp in enumerate(
components
):
name = comp.get(
"Name", "_ERR_COMP_"
)
if i == 0:
var_parts.append(
f"#{name}"
) # Primer componente con #
else:
var_parts.append(
f".{name}"
) # Componentes subsecuentes con .
idx_result = "".join(var_parts)
if idx_result:
indices_parts.append(idx_result)
else:
indices_parts.append(
"/*_ERR_EMPTY_VAR_*/"
)
else:
indices_parts.append(
"/*_ERR_NO_SYMBOL_*/"
)
else:
# Para otros scopes, usar recursión como fallback
idx_result = (
reconstruct_scl_from_tokens(
middle_child
)
)
if idx_result and idx_result.strip():
indices_parts.append(
idx_result.strip()
)
else:
indices_parts.append(
"/*_ERR_RECURSIVE_EMPTY_*/"
)
elif child_tag == "Token":
# Token de separación (como ",")
token_text = middle_child.get("Text", "")
if token_text.strip():
indices_parts.append(token_text)
if indices_parts:
symbol_text_parts.append(
f"[{','.join(indices_parts)}]"
)
else:
# No es acceso de array, buscar Access anidados de la forma tradicional
index_access_nodes = comp.xpath(
"./st:Access", namespaces=ns
)
if not index_access_nodes:
index_access_nodes = comp.xpath("./Access")
if index_access_nodes:
indices_text = [
reconstruct_scl_from_tokens(idx_node)
for idx_node in index_access_nodes
]
indices_cleaned = [
idx.replace("\n", "").strip()
for idx in indices_text
]
symbol_text_parts.append(
f"[{','.join(indices_cleaned)}]"
)
else:
# Menos de 3 hijos, usar búsqueda tradicional de Access
index_access_nodes = comp.xpath(
"./st:Access", namespaces=ns
)
if not index_access_nodes:
index_access_nodes = comp.xpath("./Access")
if index_access_nodes:
indices_text = [
reconstruct_scl_from_tokens(idx_node)
for idx_node in index_access_nodes
]
indices_cleaned = [
idx.replace("\n", "").strip()
for idx in indices_text
]
symbol_text_parts.append(
f"[{','.join(indices_cleaned)}]"
)
access_str = "".join(symbol_text_parts)
else:
access_str = f"/*_ERR_NO_SYMBOL_IN_{scope}_*/"
# --- Constantes Tipadas (TypedConstant) ---
elif scope == "TypedConstant":
constant_elem = elem.xpath("./st:Constant", namespaces=ns)
if not constant_elem:
constant_elem = elem.xpath("./Constant")
if constant_elem:
const_value_elem = constant_elem[0].xpath(
"./st:ConstantValue", namespaces=ns
)
if not const_value_elem:
const_value_elem = constant_elem[0].xpath("./ConstantValue")
if const_value_elem and const_value_elem[0].text:
const_val = const_value_elem[0].text.strip()
# Para constantes tipadas, usar el valor directamente (ya incluye el tipo como T#5s)
access_str = const_val
else:
access_str = "/*_ERR_NO_CONST_VALUE_*/"
else:
access_str = "/*_ERR_NO_CONST_ELEM_*/"
# --- Constantes Literales ---
elif scope == "LiteralConstant":
# Buscar nodos Constant tanto con namespace st: como sin namespace
constant_elem = elem.xpath("./st:Constant", namespaces=ns)
if not constant_elem:
# Si no se encuentran con namespace, buscar sin namespace
constant_elem = elem.xpath("./Constant")
if constant_elem:
# Buscar ConstantValue tanto con namespace como sin namespace
val_elem = constant_elem[0].xpath(
"./st:ConstantValue/text()", namespaces=ns
)
if not val_elem:
val_elem = constant_elem[0].xpath("./ConstantValue/text()")
# Si no hay texto directo, buscar el texto del nodo ConstantValue
if not val_elem:
val_nodes = constant_elem[0].xpath("./ConstantValue")
if val_nodes and val_nodes[0].text:
val_elem = [val_nodes[0].text]
# Buscar ConstantType tanto con namespace como sin namespace
type_elem = constant_elem[0].xpath(
"./st:ConstantType/text()", namespaces=ns
)
if not type_elem:
type_elem = constant_elem[0].xpath("./ConstantType/text()")
const_val = (
val_elem[0].strip()
if val_elem and val_elem[0] is not None
else "_ERR_CONSTVAL_"
)
const_type = (
type_elem[0].strip().lower()
if type_elem and type_elem[0] is not None
else ""
)
# Si no hay tipo explícito, inferir de acuerdo al valor
if not const_type:
if const_val.lower() in ["true", "false"]:
const_type = "bool"
elif const_val.startswith("'") and const_val.endswith("'"):
const_type = "string"
elif const_val.isdigit() or (
const_val.startswith("-") and const_val[1:].isdigit()
):
const_type = "int"
elif "." in const_val:
const_type = "real"
else:
const_type = "" # Sin tipo específico, usar valor directo
# Formatear según tipo
if const_type == "bool":
access_str = const_val.upper()
elif const_type.lower() == "string":
if not (const_val.startswith("'") and const_val.endswith("'")):
replaced_val = const_val.replace("'", "''")
access_str = f"'{replaced_val}'"
else:
access_str = const_val
elif const_type.lower() == "char":
if not (const_val.startswith("'") and const_val.endswith("'")):
replaced_val = const_val.replace("'", "''")
access_str = f"'{replaced_val}'"
else:
access_str = const_val
elif const_type == "wstring":
replaced_val = const_val.replace("'", "''")
access_str = f"WSTRING#'{replaced_val}'"
elif const_type == "wchar":
replaced_val = const_val.replace("'", "''")
access_str = f"WCHAR#'{replaced_val}'"
elif const_type == "time":
access_str = (
f"T#{const_val}"
if not const_val.startswith("T#")
else const_val
)
elif const_type == "ltime":
access_str = (
f"LT#{const_val}"
if not const_val.startswith("LT#")
else const_val
)
elif const_type == "s5time":
access_str = (
f"S5T#{const_val}"
if not const_val.startswith("S5T#")
else const_val
)
elif const_type == "date":
access_str = (
f"D#{const_val}"
if not const_val.startswith("D#")
else const_val
)
elif const_type == "dtl":
access_str = (
f"DTL#{const_val}"
if not const_val.startswith("DTL#")
else const_val
)
elif const_type == "dt":
access_str = (
f"DT#{const_val}"
if not const_val.startswith("DT#")
else const_val
)
elif const_type == "tod":
access_str = (
f"TOD#{const_val}"
if not const_val.startswith("TOD#")
else const_val
)
elif const_type in [
"int",
"dint",
"sint",
"usint",
"uint",
"udint",
"real",
"lreal",
"word",
"dword",
"byte",
]:
# Añadir .0 para reales si no tienen decimal
if (
const_type in ["real", "lreal"]
and "." not in const_val
and "e" not in const_val.lower()
):
access_str = f"{const_val}.0"
else:
access_str = const_val
else: # Otros tipos o sin tipo específico - usar valor directo
access_str = const_val
else:
access_str = "/*_ERR_NOCONST_*/"
# --- Llamadas a Funciones/Bloques (Scope=Call) ---
elif scope == "Call":
# Primero intentar con CallInfo (estructura tradicional)
call_info_node = elem.xpath("./st:CallInfo", namespaces=ns)
if call_info_node:
ci = call_info_node[0]
call_name = ci.get("Name", "_ERR_CALLNAME_")
call_type = ci.get("BlockType") # FB, FC, etc.
# Parámetros (están como Access o Token dentro de CallInfo/Parameter)
params = ci.xpath("./st:Parameter", namespaces=ns)
param_parts = []
for p in params:
p_name = p.get("Name", "_ERR_PARAMNAME_")
# El valor del parámetro está dentro del nodo Parameter
p_value_node = p.xpath(
"./st:Access | ./st:Token", namespaces=ns
) # Buscar Access o Token
p_value_scl = ""
if p_value_node:
p_value_scl = reconstruct_scl_from_tokens(
p
) # Parsear el contenido del parámetro
p_value_scl = p_value_scl.replace(
"\n", ""
).strip() # Limpiar SCL resultante
param_parts.append(f"{p_name} := {p_value_scl}")
# Manejar FB vs FC
if call_type == "FB":
instance_node = ci.xpath(
"./st:Instance/st:Component/@Name", namespaces=ns
)
if instance_node:
instance_name = f'"{instance_node[0]}"'
access_str = f"{instance_name}({', '.join(param_parts)})"
else: # FB sin instancia? Podría ser STAT
access_str = f'"{call_name}"({", ".join(param_parts)}) (* FB sin instancia explícita? *)'
elif call_type == "FC":
access_str = f'"{call_name}"({", ".join(param_parts)})'
else: # Otros tipos de llamada
access_str = f'"{call_name}"({", ".join(param_parts)}) (* Tipo: {call_type} *)'
# Si no hay CallInfo, intentar con Instruction (estructura de SCL nativo)
else:
instruction_node = elem.xpath("./st:Instruction", namespaces=ns)
if instruction_node:
instr = instruction_node[0]
instr_name = instr.get(
"Name"
) # Puede ser None para llamadas sin nombre específico
# Parámetros con nombre y sin nombre
named_params = instr.xpath("./st:Parameter", namespaces=ns)
nameless_params = instr.xpath(
"./st:NamelessParameter", namespaces=ns
)
param_parts = []
# Procesar parámetros con nombre
for p in named_params:
p_name = p.get("Name", "_ERR_PARAMNAME_")
# Reconstruir el valor del parámetro
p_value_scl = reconstruct_scl_from_tokens(p)
p_value_scl = p_value_scl.replace("\n", "").strip()
# Si el valor ya contiene ":=", no lo duplicar
if p_value_scl.startswith(":="):
param_parts.append(f"{p_name} {p_value_scl}")
elif p_value_scl:
param_parts.append(f"{p_name} := {p_value_scl}")
else:
param_parts.append(f"{p_name} := /*_ERR_PARAM_VALUE_*/")
# Procesar parámetros sin nombre
for p in nameless_params:
p_value_scl = reconstruct_scl_from_tokens(p)
p_value_scl = p_value_scl.replace("\n", "").strip()
if p_value_scl:
param_parts.append(p_value_scl)
else:
param_parts.append("/*_ERR_NAMELESS_PARAM_*/")
# Construir la llamada
if instr_name:
access_str = f'"{instr_name}"({", ".join(param_parts)})'
else:
# Llamada sin nombre específico, probablemente un FB instance call
access_str = f'({", ".join(param_parts)})'
else:
access_str = "/*_ERR_NO_CALLINFO_*/"
# Añadir más scopes si son necesarios (e.g., Address, Label, Reference)
scl_parts.append(access_str)
elif tag == "Comment" or tag == "LineComment":
# Manejar diferentes estructuras de comentarios
if tag == "LineComment":
# LineComment tiene estructura <Text> directa, no MultilingualText
text_elem = elem.xpath("./st:Text", namespaces=ns)
if not text_elem:
text_elem = elem.xpath("./Text")
if text_elem and text_elem[0].text:
comment_text = text_elem[0].text.strip()
# Preservar comentarios de bloque multilinea
if "\n" in comment_text:
# Comentario multilinea: usar formato (* ... *)
scl_parts.append(f"(* {comment_text} *)")
else:
# Comentario de línea simple
scl_parts.append(f"// {comment_text}")
else:
scl_parts.append("// [Comentario vacío]")
else:
# Comment tradicional: usar get_multilingual_text
comment_text = get_multilingual_text(elem)
scl_parts.append(f"(* {comment_text} *)")
# Ignorar otros tipos de nodos si no son relevantes para el SCL
full_scl = "".join(scl_parts)
# --- Re-indentación Simple ---
output_lines = []
indent_level = 0
indent_str = " " # Dos espacios
case_indent_level = 0 # Nivel especial para manejar CASE statements
for line in full_scl.splitlines():
trimmed_line = line.strip()
if not trimmed_line:
# Mantener líneas vacías? Opcional.
# output_lines.append("")
continue
line_upper = trimmed_line.upper()
# Detectar labels de CASE (pattern: #SomeName: o SomeName:)
is_case_label = (
":" in trimmed_line
and (
trimmed_line.startswith("#")
or not any(
keyword in line_upper for keyword in ["IF", "ELSIF", "ELSE", "THEN"]
)
)
and line_upper not in ["ELSE:", "ELSIF:"]
and "//" not in trimmed_line.split(":")[0] # Evitar comentarios
)
# Reducir indentación ANTES de imprimir para ciertas palabras clave
if line_upper.startswith(("END_", "UNTIL", "}")):
indent_level = max(0, indent_level - 1)
if line_upper.startswith("END_CASE"):
case_indent_level = 0
elif line_upper in ["ELSE", "ELSIF"] and not is_case_label:
indent_level = max(0, indent_level - 1)
elif is_case_label and case_indent_level > 0:
# Los labels de case van un nivel menos indentados que el contenido del case
indent_level = max(0, case_indent_level)
# Aplicar indentación
current_indent = indent_level
if is_case_label and case_indent_level > 0:
# Los labels de case van un nivel menos que el contenido normal
current_indent = case_indent_level
output_lines.append(indent_str * current_indent + trimmed_line)
# Aumentar indentación DESPUÉS de imprimir para ciertas palabras clave
if line_upper.endswith(("THEN", "DO", "{")) or line_upper == "ELSE":
# Excepción: No indentar después de ELSE IF
if not (
line_upper == "ELSE"
and len(output_lines) > 0
and "IF" in output_lines[-1].upper()
):
indent_level += 1
elif line_upper.startswith(("IF ", "FOR ", "WHILE ", "REPEAT", "STRUCT")):
indent_level += 1
elif line_upper.startswith("CASE ") and line_upper.endswith(" OF"):
# Manejar CASE especialmente
case_indent_level = indent_level + 1
indent_level += 1
elif is_case_label and case_indent_level > 0:
# Después de un label de case, el contenido va un nivel más indentado
indent_level = case_indent_level + 1
return "\n".join(output_lines)
def parse_scl_network(network_element):
"""
Parsea una red SCL extrayendo el código fuente reconstruido.
Devuelve un diccionario representando la red para el JSON.
"""
network_id = network_element.get("ID", "UnknownSCL_ID")
network_lang = "SCL" # Sabemos que es SCL
# --- Obtener título y comentario para coherencia con otros parsers ---
title_elem = network_element.xpath(
"./ObjectList/MultilingualText[@CompositionName='Title']", namespaces=ns
)
network_title = (
get_multilingual_text(title_elem[0]) if title_elem else f"Network {network_id}"
)
comment_elem = network_element.xpath(
"./ObjectList/MultilingualText[@CompositionName='Comment']", namespaces=ns
)
network_comment = get_multilingual_text(comment_elem[0]) if comment_elem else ""
# --- Buscar NetworkSource y StructuredText sin depender del namespace ---
network_source_node = network_element.xpath(".//*[local-name()='NetworkSource']")
structured_text_node = None
if network_source_node:
st_nodes = network_source_node[0].xpath(".//*[local-name()='StructuredText']")
if st_nodes:
structured_text_node = st_nodes[0]
reconstructed_scl = "// SCL extraction failed: StructuredText node not found.\n"
if structured_text_node is not None:
reconstructed_scl = reconstruct_scl_from_tokens(structured_text_node)
parsed_network_data = {
"id": network_id,
"title": network_title,
"comment": network_comment,
"language": network_lang,
"logic": [
{
"instruction_uid": f"SCL_{network_id}",
"type": "RAW_SCL_CHUNK",
"scl": reconstructed_scl,
}
],
}
return parsed_network_data
# --- Función de Información del Parser ---
def get_parser_info():
"""Devuelve la información para este parser."""
return {
"language": ["SCL"], # Lista de lenguajes soportados
"parser_func": parse_scl_network, # Función a llamar
}