ParamManagerScripts/backend/script_groups/CSharpCodeMerger/x1.py

303 lines
11 KiB
Python

"""
Script para hacer una union de los cambios generados por un LLM en un archivo de código C#.
"""
import os
import sys
import json
import re
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple
import difflib
# Forzar UTF-8 en la salida estándar
sys.stdout.reconfigure(encoding="utf-8")
@dataclass
class CodeSection:
type: str
name: str
content: str
start_line: int
end_line: int
parent: Optional['CodeSection'] = None
children: List['CodeSection'] = None
attributes: List[str] = None
original_indent: str = ""
def __post_init__(self):
if self.children is None:
self.children = []
if self.attributes is None:
self.attributes = []
class CSharpParser:
def __init__(self):
# Definimos el orden específico de las secciones
self.section_order = [
'using',
'comment',
'attribute',
'namespace',
'class',
'interface',
'region',
'field',
'property',
'method'
]
self.patterns = {
'using': r'^\s*using\s+([^;]+);',
'namespace': r'^\s*namespace\s+([^\s{]+)',
'class': r'^\s*(?:public|private|internal|protected)?\s*(?:partial\s+)?(?:abstract\s+)?class\s+(\w+)',
'interface': r'^\s*(?:public|private|internal|protected)?\s*interface\s+(\w+)',
'method': r'^\s*(?:public|private|internal|protected)?\s*(?:virtual|override|static|async)?\s*[\w<>]+\s+(\w+)\s*\(',
'property': r'^\s*(?:\[.+\]\s*)*(?:public|private|internal|protected)?\s*[\w<>]+\s+(\w+)\s*(?:{\s*get;|=>)',
'field': r'^\s*(?:public|private|internal|protected)?\s*(?:readonly|static|const)?\s*[\w<>]+\s+(\w+)\s*(?:=|;)',
'attribute': r'^\s*\[([^\]]+)\]',
'comment': r'^\s*(?://.*|/\*.*?\*/)',
'region': r'^\s*#region\s+(.+)$'
}
self.placeholder_pattern = r'//\s*\.\.\.\s*resto del código\s*\.\.\.'
def get_section_order_index(self, section_type: str) -> int:
try:
return self.section_order.index(section_type)
except ValueError:
return len(self.section_order)
def get_indent(self, line: str) -> str:
match = re.match(r'^(\s*)', line)
return match.group(1) if match else ""
def parse_file(self, content: str) -> CodeSection:
lines = content.split('\n')
root = CodeSection('root', '', '', 0, len(lines))
current_context = [root]
current_attributes = []
i = 0
while i < len(lines):
line = lines[i]
stripped = line.strip()
original_indent = self.get_indent(line)
# Skip empty lines
if not stripped:
i += 1
continue
# Procesar cada tipo de sección según su orden definido
matched = False
for section_type in self.section_order:
if section_type not in self.patterns:
continue
pattern = self.patterns[section_type]
match = re.match(pattern, line)
if match:
name = match.group(1)
if section_type in ['namespace', 'class', 'interface', 'region']:
# Procesar secciones con bloques
section_lines, j = self._process_block(lines, i)
section = CodeSection(
section_type,
name,
'\n'.join(section_lines),
i,
j,
parent=current_context[-1],
attributes=current_attributes.copy(),
original_indent=original_indent
)
current_context[-1].children.append(section)
if section_type in ['namespace', 'class', 'interface']:
current_context.append(section)
i = j + 1
else:
# Procesar secciones simples
section = CodeSection(
section_type,
name,
line,
i,
i,
parent=current_context[-1],
attributes=current_attributes.copy(),
original_indent=original_indent
)
current_context[-1].children.append(section)
i += 1
current_attributes = []
matched = True
break
if not matched:
i += 1
if stripped == '}' and len(current_context) > 1:
current_context.pop()
return root
def _process_block(self, lines: List[str], start_index: int) -> Tuple[List[str], int]:
brace_count = 0
section_lines = []
j = start_index
while j < len(lines):
current_line = lines[j]
section_lines.append(current_line)
brace_count += current_line.count('{') - current_line.count('}')
if brace_count == 0 and len(section_lines) > 1:
break
j += 1
return section_lines, j
class CSharpCodeMerger:
def __init__(self, original_code: str, llm_code: str):
self.parser = CSharpParser()
self.original_tree = self.parser.parse_file(original_code)
self.llm_tree = self.parser.parse_file(llm_code)
self.original_code = original_code
self.llm_code = llm_code
def _sort_sections(self, sections: List[CodeSection]) -> List[CodeSection]:
return sorted(sections, key=lambda x: (
self.parser.get_section_order_index(x.type),
x.start_line
))
def _merge_sections(self, original: CodeSection, llm: CodeSection) -> CodeSection:
merged = CodeSection(
original.type,
original.name,
original.content,
original.start_line,
original.end_line,
original.parent,
original_indent=original.original_indent
)
# Crear mapas de hijos por tipo y nombre
original_children = {(c.type, c.name): c for c in original.children}
llm_children = {(c.type, c.name): c for c in llm.children}
merged_children = []
# Procesar hijos originales
for key, orig_child in original_children.items():
if key in llm_children:
llm_child = llm_children[key]
if orig_child.type in ['namespace', 'class', 'interface', 'region']:
merged_children.append(self._merge_sections(orig_child, llm_child))
else:
merged_children.append(llm_child if orig_child.content != llm_child.content else orig_child)
else:
merged_children.append(orig_child)
# Añadir nuevos hijos del LLM
for key, llm_child in llm_children.items():
if key not in original_children:
merged_children.append(llm_child)
# Ordenar los hijos según el orden definido
merged.children = self._sort_sections(merged_children)
return merged
def _generate_code(self, section: CodeSection, indent_level: int = 0) -> str:
lines = []
base_indent = section.original_indent or " " * indent_level
# Añadir atributos
for attr in section.attributes:
lines.append(base_indent + attr.lstrip())
if section.type != 'root':
content_lines = section.content.split('\n')
lines.append(base_indent + content_lines[0].lstrip())
if len(content_lines) > 1:
for line in content_lines[1:]:
if line.strip():
current_indent = re.match(r'^(\s*)', line).group(1)
content = line.lstrip()
lines.append(base_indent + current_indent + content)
if section.children:
sorted_children = self._sort_sections(section.children)
for child in sorted_children:
child_code = self._generate_code(child, indent_level + 1 if section.type != 'root' else 0)
if child_code:
lines.append(child_code)
return '\n'.join(lines)
def merge_code(self) -> str:
merged_tree = self._merge_sections(self.original_tree, self.llm_tree)
return self._generate_code(merged_tree)
def generate_diff(self) -> str:
merged = self.merge_code()
diff = difflib.unified_diff(
self.original_code.splitlines(keepends=True),
merged.splitlines(keepends=True),
fromfile='original',
tofile='merged'
)
return ''.join(diff)
def main():
configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}"))
working_directory = configs.get("working_directory", ".")
work_config = configs.get("level3", {})
input_file = work_config.get("input_file", "original.cs")
llm_file = work_config.get("llm_file", "llm_generated.cs")
output_directory = work_config.get("output_directory", ".")
input_path = os.path.join(working_directory, input_file)
llm_path = os.path.join(working_directory, llm_file)
output_merged = os.path.join(output_directory, "merged.cs")
output_diff = os.path.join(output_directory, "changes.diff")
for path in [input_path, llm_path]:
if not os.path.exists(path):
print(f"Error: File {path} does not exist")
return
os.makedirs(output_directory, exist_ok=True)
try:
with open(input_path, "r", encoding="utf-8") as f:
original_code = f.read()
with open(llm_path, "r", encoding="utf-8") as f:
llm_code = f.read()
merger = CSharpCodeMerger(original_code, llm_code)
merged_code = merger.merge_code()
with open(output_merged, "w", encoding="utf-8") as f:
f.write(merged_code)
with open(output_diff, "w", encoding="utf-8") as f:
f.write(merger.generate_diff())
print(f"Successfully processed files:")
print(f"- Merged code saved to: {output_merged}")
print(f"- Diff file saved to: {output_diff}")
except Exception as e:
print(f"Error processing files: {str(e)}")
return
if __name__ == "__main__":
main()