ParamManagerScripts/backend/script_groups/S7_DB_Utils/x2.py

335 lines
18 KiB
Python

import re
import os
import sys # Not strictly needed by this version but often kept from original
import glob
import pandas as pd # For Excel writing
# --- Functions for script operation ---
def find_working_directory_from_x1():
"""
Finds the working directory.
Defaults to current directory. Adapt if specific configuration is needed.
"""
print("Info: `find_working_directory_from_x1` is using the current directory.")
return os.getcwd()
def extract_sections(content):
"""
Extracts UDT definitions, main declaration section, and initialization section from S7 AWL/DB content.
Uses re.IGNORECASE and re.DOTALL (via ?is) for matching keywords across different casings and newlines.
"""
content = content.replace('\r\n', '\n') # Normalize line endings
udt_definitions_content = ""
# Regex to find TYPE...END_TYPE blocks (UDT definitions)
udt_matches = list(re.finditer(r'(?is)(TYPE\s+.*?\s+END_TYPE\s*\n?)', content))
content_after_udts = content
if udt_matches:
udt_definitions_content = "".join(match.group(0) for match in udt_matches)
# Get content after the last UDT definition
last_udt_end = udt_matches[-1].end()
content_after_udts = content[last_udt_end:]
header_text = "" # Placeholder, not actively used in this script's comparison logic
rest_of_content_for_struct = content_after_udts
# Try to find the main DATA_BLOCK header and the start of its STRUCT
header_match = re.search(r'(?is)^(.*?(?:DATA_BLOCK.*?VERSION.*?\n))(.*?STRUCT)', content_after_udts)
if header_match:
# Content for further parsing starts at "STRUCT"
rest_of_content_for_struct = content_after_udts[header_match.start(2):]
else:
# Fallback: find the first "STRUCT" if the specific header pattern isn't met
header_fallback_match = re.search(r'(?is)(.*?)(STRUCT)', content_after_udts)
if header_fallback_match:
rest_of_content_for_struct = content_after_udts[header_fallback_match.start(2):]
else:
# If no STRUCT is found, declaration section will be empty
print(f"Warning: No 'STRUCT' keyword found for main DB declarations in a content block.")
# Declaration section: from the found STRUCT up to BEGIN
decl_match = re.search(r'(?is)STRUCT\s*(.*?)BEGIN', rest_of_content_for_struct)
decl_section = decl_match.group(1).strip() if decl_match else ""
# Initialization section: from BEGIN up to END_DATA_BLOCK
init_match = re.search(r'(?is)BEGIN\s*(.*?)END_DATA_BLOCK', rest_of_content_for_struct)
init_section = init_match.group(1).strip() if init_match else ""
# Footer after END_DATA_BLOCK isn't used
return udt_definitions_content, header_text, decl_section, init_section, ""
def find_comparison_files_detailed(working_dir, data_suffix="_data", format_suffix="_format", updated_suffix_part="_updated"):
"""Finds data, format, and _updated files based on naming conventions."""
all_files_in_dir = []
for ext_pattern in ["*.db", "*.awl", "*.txt"]: # Common S7 export extensions
all_files_in_dir.extend(glob.glob(os.path.join(working_dir, ext_pattern)))
# Normalize paths for consistent comparisons and ensure uniqueness
all_files_in_dir = sorted(list(set(os.path.normpath(f) for f in all_files_in_dir)))
found_paths = {'data': None, 'format': None, 'updated': None}
def select_best_file(file_list):
if not file_list: return None
# Prioritize: .db, then .awl, then .txt
file_list.sort(key=lambda x: ('.db' not in x.lower(), '.awl' not in x.lower(), '.txt' not in x.lower()))
return file_list[0]
# Find _data file: contains data_suffix, does not contain updated_suffix_part
data_candidates = [f for f in all_files_in_dir if data_suffix in os.path.basename(f).lower() and updated_suffix_part not in os.path.basename(f).lower()]
found_paths['data'] = select_best_file(data_candidates)
# Find _format file: contains format_suffix, does not contain updated_suffix_part
format_candidates = [f for f in all_files_in_dir if format_suffix in os.path.basename(f).lower() and updated_suffix_part not in os.path.basename(f).lower()]
if found_paths['data'] and format_candidates: # Ensure it's not the same as _data file
format_candidates = [f for f in format_candidates if f != found_paths['data']]
found_paths['format'] = select_best_file(format_candidates)
# Find _updated file:
# Strategy 1: Based on format_file name (most reliable if format_file found)
if found_paths['format']:
format_basename = os.path.basename(found_paths['format'])
name_part, first_ext = os.path.splitext(format_basename)
updated_basename_candidate = ""
# Handle double extensions like ".db.txt" or ".awl.txt"
if first_ext.lower() == ".txt" and ('.db' in name_part.lower() or '.awl' in name_part.lower()):
base_name_for_main_ext, second_ext = os.path.splitext(name_part)
updated_basename_candidate = base_name_for_main_ext + updated_suffix_part + second_ext + first_ext
else: # Single extension
updated_basename_candidate = name_part + updated_suffix_part + first_ext
potential_updated_path = os.path.join(working_dir, updated_basename_candidate)
if os.path.exists(potential_updated_path) and potential_updated_path in all_files_in_dir:
found_paths['updated'] = potential_updated_path
# Strategy 2: If not found by deriving from format_file, search more broadly
if not found_paths['updated']:
updated_candidates = [f for f in all_files_in_dir if updated_suffix_part in os.path.basename(f).lower()]
if found_paths['format'] and updated_candidates: # Prefer updated file related to format file's base name
format_base = os.path.basename(found_paths['format']).split(format_suffix)[0]
updated_candidates = [f for f in updated_candidates if format_base in os.path.basename(f)]
# Exclude already identified data and format files
if found_paths['data'] and updated_candidates: updated_candidates = [f for f in updated_candidates if f != found_paths['data']]
if found_paths['format'] and updated_candidates: updated_candidates = [f for f in updated_candidates if f != found_paths['format']]
found_paths['updated'] = select_best_file(updated_candidates)
print("Identified files for comparison:")
for key, val in found_paths.items():
print(f" {key.capitalize()} file: {os.path.basename(val) if val else 'Not found'}")
return found_paths['data'], found_paths['format'], found_paths['updated']
def get_variables_from_section_content(section_str, section_type="declaration"):
""" Parses a declaration or initialization section string and returns a list of variable dicts. """
variables = []
idx = 0
lines = section_str.replace('\r\n', '\n').split('\n')
for line_content in lines:
line = line_content.strip()
if not line or line.startswith('//'): continue # Skip empty or comment lines
line_upper = line.upper()
# Skip lines that are purely structural (STRUCT, TYPE, END_STRUCT)
# unless they also contain a full declaration/assignment on the same line.
if (line_upper == 'STRUCT' or line_upper.startswith('TYPE ') or line_upper == 'END_STRUCT' or line_upper == 'BEGIN' or line_upper == 'END_DATA_BLOCK'):
if not (':' in line and ';' in line or ':=' in line and ';' in line ): # if not also a var line
continue
var_name, var_type, value = None, None, None
if section_type == "declaration": # Expect: VarName : VarType [:= InitialValue] ;
if ':' in line and ';' in line:
# Name: part before ':' (handles simple and "quoted" names)
name_match = re.match(r'^\s*(\"(?:\\\"|[^\"])*\"|[a-zA-Z_][\w]*)', line, re.IGNORECASE)
var_name = name_match.group(1).strip().replace('"', "") if name_match else None
# Type: part between ':' and potential ':=' or ';' (handles "UDT", simple, ARRAY)
type_match = re.search(r':\s*(\"[^\"]+\"|[^:=;]+)', line, re.IGNORECASE)
var_type = type_match.group(1).strip().replace('"', "") if type_match else None
# Value: part between ':=' and ';'
assign_match = re.search(r':=\s*([^;]+)', line, re.IGNORECASE)
if assign_match: value = assign_match.group(1).strip()
if not var_name or not var_type: continue # Must have name and type for a declaration
else: continue # Not a declaration line by this rule
elif section_type == "initialization": # Expect: VarNameOrPath := Value ;
if ':=' in line and ';' in line:
# Name/Path: part before ':=' (handles "Quoted.Path", Simple.Path, Array[1].Path)
name_match = re.match(r'^\s*(\"(?:\\\"|[^\"])*\"|[a-zA-Z_][\w"\[\],\.]*(?:\[.*?\]|\.[a-zA-Z_][\w"\[\],\.]*)*)\s*:=', line, re.IGNORECASE)
var_name = name_match.group(1).strip().replace('"', "") if name_match else None
# Value: part between ':=' and ';'
value_match = re.search(r':=\s*([^;]+)', line, re.IGNORECASE)
value = value_match.group(1).strip() if value_match else None
if not var_name or value is None : continue # Must have name and value for assignment
else: continue # Not an assignment line
if var_name is not None: # If a name was captured (and other conditions met), record it
variables.append({
"index": idx, "name": var_name, "type": var_type, "value": value,
"original_line": line_content
})
idx += 1
return variables
def process_file_for_vars(file_path):
"""
Reads a file, extracts main STRUCT declarations and BEGIN block initializations.
UDT definitions themselves are not included in the returned `main_struct_decl_vars`.
"""
if not file_path or not os.path.exists(file_path):
return [], [] # Return empty lists if file not found
try:
with open(file_path, 'r', encoding='utf-8-sig') as f: # utf-8-sig handles BOM
content = f.read()
except Exception as e:
print(f"Error reading file {file_path}: {e}")
return [], []
# udt_definitions_content is extracted but not directly used for the comparison lists below
_udt_definitions_content, _header, decl_content_main, init_content, _footer = extract_sections(content)
# "main_struct_decl_vars" are from the main DATA_BLOCK's STRUCT section (initial values).
main_struct_decl_vars = get_variables_from_section_content(decl_content_main, "declaration")
# "begin_block_init_vars" are from the BEGIN...END_DATA_BLOCK section (current values).
begin_block_init_vars = get_variables_from_section_content(init_content, "initialization")
return main_struct_decl_vars, begin_block_init_vars
def generate_excel_comparison(data_file, format_file, updated_file, output_excel_path):
"""Generates an Excel file with two sheets comparing variables from three source files."""
print(f"\nProcessing _data file: {os.path.basename(data_file) if data_file else 'N/A'}")
data_decl_vars, data_init_vars = process_file_for_vars(data_file)
print(f" Found {len(data_decl_vars)} declaration vars, {len(data_init_vars)} initialization vars in _data file.")
print(f"Processing _format file: {os.path.basename(format_file) if format_file else 'N/A'}")
format_decl_vars, format_init_vars = process_file_for_vars(format_file)
print(f" Found {len(format_decl_vars)} declaration vars, {len(format_init_vars)} initialization vars in _format file.")
print(f"Processing _updated file: {os.path.basename(updated_file) if updated_file else 'N/A'}")
updated_decl_vars, updated_init_vars = process_file_for_vars(updated_file)
print(f" Found {len(updated_decl_vars)} declaration vars, {len(updated_init_vars)} initialization vars in _updated file.")
placeholder_var = {"name": "", "type": "", "value": "", "original_line": ""}
# Define column order once, will be used for both sheets
column_order = ["Variable Name (_data / _format)", "Data Type", "Value (_data)", "Value (_format)", "Value (_updated)"]
# --- Prepare data for "Declarations (Initial Values)" sheet ---
decl_excel_rows = []
# Determine max length for declaration rows based on non-empty lists
decl_lengths = [len(lst) for lst in [data_decl_vars, format_decl_vars, updated_decl_vars] if lst is not None]
max_decl_len = max(decl_lengths) if decl_lengths else 0
print(f"\nComparing {max_decl_len} positional declaration entries (STRUCT section)...")
for i in range(max_decl_len):
var_d = data_decl_vars[i] if data_decl_vars and i < len(data_decl_vars) else placeholder_var
var_f = format_decl_vars[i] if format_decl_vars and i < len(format_decl_vars) else placeholder_var
var_u = updated_decl_vars[i] if updated_decl_vars and i < len(updated_decl_vars) else placeholder_var
# Construct combined name
name_d_str = var_d['name'] if var_d['name'] else ""
name_f_str = var_f['name'] if var_f['name'] else ""
combined_name = f"{name_d_str} / {name_f_str}".strip(" /")
if not combined_name: combined_name = var_u['name'] or name_d_str or name_f_str # Fallback
# Determine Data Type: Priority: format, then updated, then data
type_to_use = var_f['type'] or var_u['type'] or var_d['type'] or "N/A"
decl_excel_rows.append({
"Variable Name (_data / _format)": combined_name,
"Data Type": type_to_use,
"Value (_data)": str(var_d['value']) if var_d['value'] is not None else "",
"Value (_format)": str(var_f['value']) if var_f['value'] is not None else "",
"Value (_updated)": str(var_u['value']) if var_u['value'] is not None else ""
})
df_declarations = pd.DataFrame(decl_excel_rows)
if not df_declarations.empty: # Apply column order if DataFrame is not empty
for col in column_order:
if col not in df_declarations.columns: df_declarations[col] = "" # Ensure all columns exist
df_declarations = df_declarations[column_order]
# --- Prepare data for "Initializations (Current Values)" sheet ---
init_excel_rows = []
init_lengths = [len(lst) for lst in [data_init_vars, format_init_vars, updated_init_vars] if lst is not None]
max_init_len = max(init_lengths) if init_lengths else 0
print(f"Comparing {max_init_len} positional initialization entries (BEGIN block)...")
for i in range(max_init_len):
var_d = data_init_vars[i] if data_init_vars and i < len(data_init_vars) else placeholder_var
var_f = format_init_vars[i] if format_init_vars and i < len(format_init_vars) else placeholder_var
var_u = updated_init_vars[i] if updated_init_vars and i < len(updated_init_vars) else placeholder_var
name_d_str = var_d['name'] if var_d['name'] else ""
name_f_str = var_f['name'] if var_f['name'] else ""
combined_name = f"{name_d_str} / {name_f_str}".strip(" /")
if not combined_name: combined_name = var_u['name'] or name_d_str or name_f_str
init_excel_rows.append({
"Variable Name (_data / _format)": combined_name,
"Data Type": "N/A", # Type is not usually re-declared in initialization lines
"Value (_data)": str(var_d['value']) if var_d['value'] is not None else "",
"Value (_format)": str(var_f['value']) if var_f['value'] is not None else "",
"Value (_updated)": str(var_u['value']) if var_u['value'] is not None else ""
})
df_initializations = pd.DataFrame(init_excel_rows)
if not df_initializations.empty: # Apply column order
for col in column_order:
if col not in df_initializations.columns: df_initializations[col] = ""
df_initializations = df_initializations[column_order]
# --- Write to Excel with two sheets ---
try:
with pd.ExcelWriter(output_excel_path, engine='openpyxl') as writer:
if not df_declarations.empty:
df_declarations.to_excel(writer, sheet_name='Declarations (Initial Values)', index=False)
print(f"Written 'Declarations (Initial Values)' sheet with {len(df_declarations)} rows.")
else:
print("No data for 'Declarations (Initial Values)' sheet.")
if not df_initializations.empty:
df_initializations.to_excel(writer, sheet_name='Initializations (Current Values)', index=False)
print(f"Written 'Initializations (Current Values)' sheet with {len(df_initializations)} rows.")
else:
print("No data for 'Initializations (Current Values)' sheet.")
if df_declarations.empty and df_initializations.empty:
print("No data written to Excel as both datasets are empty.")
else:
print(f"\nSuccessfully generated Excel comparison: {output_excel_path}")
except Exception as e:
print(f"Error writing Excel file {output_excel_path}: {e}")
def main_comparator():
print("S7 Data Block Comparator to Excel (Multi-Sheet)")
print("==============================================")
working_dir = find_working_directory_from_x1()
print(f"Using working directory: {working_dir}")
data_f, format_f, updated_f = find_comparison_files_detailed(working_dir)
if not any([data_f, format_f, updated_f]): # Check if at least one relevant file was found
print("\nError: Could not find a sufficient set of input files (_data, _format, _updated). Exiting.")
return
output_filename = "S7_DB_Comparison_MultiSheet.xlsx"
output_excel_file = os.path.join(working_dir, output_filename)
generate_excel_comparison(data_f, format_f, updated_f, output_excel_file)
if __name__ == "__main__":
main_comparator()