303 lines
11 KiB
Python
303 lines
11 KiB
Python
"""
|
|
Script para hacer una union de los cambios generados por un LLM en un archivo de código C#.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from dataclasses import dataclass
|
|
from typing import List, Dict, Optional, Tuple
|
|
import difflib
|
|
|
|
# Forzar UTF-8 en la salida estándar
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
|
|
@dataclass
|
|
class CodeSection:
|
|
type: str
|
|
name: str
|
|
content: str
|
|
start_line: int
|
|
end_line: int
|
|
parent: Optional['CodeSection'] = None
|
|
children: List['CodeSection'] = None
|
|
attributes: List[str] = None
|
|
original_indent: str = ""
|
|
|
|
def __post_init__(self):
|
|
if self.children is None:
|
|
self.children = []
|
|
if self.attributes is None:
|
|
self.attributes = []
|
|
|
|
class CSharpParser:
|
|
def __init__(self):
|
|
# Definimos el orden específico de las secciones
|
|
self.section_order = [
|
|
'using',
|
|
'comment',
|
|
'attribute',
|
|
'namespace',
|
|
'class',
|
|
'interface',
|
|
'region',
|
|
'field',
|
|
'property',
|
|
'method'
|
|
]
|
|
|
|
self.patterns = {
|
|
'using': r'^\s*using\s+([^;]+);',
|
|
'namespace': r'^\s*namespace\s+([^\s{]+)',
|
|
'class': r'^\s*(?:public|private|internal|protected)?\s*(?:partial\s+)?(?:abstract\s+)?class\s+(\w+)',
|
|
'interface': r'^\s*(?:public|private|internal|protected)?\s*interface\s+(\w+)',
|
|
'method': r'^\s*(?:public|private|internal|protected)?\s*(?:virtual|override|static|async)?\s*[\w<>]+\s+(\w+)\s*\(',
|
|
'property': r'^\s*(?:\[.+\]\s*)*(?:public|private|internal|protected)?\s*[\w<>]+\s+(\w+)\s*(?:{\s*get;|=>)',
|
|
'field': r'^\s*(?:public|private|internal|protected)?\s*(?:readonly|static|const)?\s*[\w<>]+\s+(\w+)\s*(?:=|;)',
|
|
'attribute': r'^\s*\[([^\]]+)\]',
|
|
'comment': r'^\s*(?://.*|/\*.*?\*/)',
|
|
'region': r'^\s*#region\s+(.+)$'
|
|
}
|
|
self.placeholder_pattern = r'//\s*\.\.\.\s*resto del código\s*\.\.\.'
|
|
|
|
def get_section_order_index(self, section_type: str) -> int:
|
|
try:
|
|
return self.section_order.index(section_type)
|
|
except ValueError:
|
|
return len(self.section_order)
|
|
|
|
def get_indent(self, line: str) -> str:
|
|
match = re.match(r'^(\s*)', line)
|
|
return match.group(1) if match else ""
|
|
|
|
def parse_file(self, content: str) -> CodeSection:
|
|
lines = content.split('\n')
|
|
root = CodeSection('root', '', '', 0, len(lines))
|
|
current_context = [root]
|
|
current_attributes = []
|
|
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
stripped = line.strip()
|
|
original_indent = self.get_indent(line)
|
|
|
|
# Skip empty lines
|
|
if not stripped:
|
|
i += 1
|
|
continue
|
|
|
|
# Procesar cada tipo de sección según su orden definido
|
|
matched = False
|
|
for section_type in self.section_order:
|
|
if section_type not in self.patterns:
|
|
continue
|
|
|
|
pattern = self.patterns[section_type]
|
|
match = re.match(pattern, line)
|
|
|
|
if match:
|
|
name = match.group(1)
|
|
if section_type in ['namespace', 'class', 'interface', 'region']:
|
|
# Procesar secciones con bloques
|
|
section_lines, j = self._process_block(lines, i)
|
|
section = CodeSection(
|
|
section_type,
|
|
name,
|
|
'\n'.join(section_lines),
|
|
i,
|
|
j,
|
|
parent=current_context[-1],
|
|
attributes=current_attributes.copy(),
|
|
original_indent=original_indent
|
|
)
|
|
current_context[-1].children.append(section)
|
|
|
|
if section_type in ['namespace', 'class', 'interface']:
|
|
current_context.append(section)
|
|
|
|
i = j + 1
|
|
else:
|
|
# Procesar secciones simples
|
|
section = CodeSection(
|
|
section_type,
|
|
name,
|
|
line,
|
|
i,
|
|
i,
|
|
parent=current_context[-1],
|
|
attributes=current_attributes.copy(),
|
|
original_indent=original_indent
|
|
)
|
|
current_context[-1].children.append(section)
|
|
i += 1
|
|
|
|
current_attributes = []
|
|
matched = True
|
|
break
|
|
|
|
if not matched:
|
|
i += 1
|
|
|
|
if stripped == '}' and len(current_context) > 1:
|
|
current_context.pop()
|
|
|
|
return root
|
|
|
|
def _process_block(self, lines: List[str], start_index: int) -> Tuple[List[str], int]:
|
|
brace_count = 0
|
|
section_lines = []
|
|
j = start_index
|
|
|
|
while j < len(lines):
|
|
current_line = lines[j]
|
|
section_lines.append(current_line)
|
|
brace_count += current_line.count('{') - current_line.count('}')
|
|
|
|
if brace_count == 0 and len(section_lines) > 1:
|
|
break
|
|
|
|
j += 1
|
|
|
|
return section_lines, j
|
|
|
|
class CSharpCodeMerger:
|
|
def __init__(self, original_code: str, llm_code: str):
|
|
self.parser = CSharpParser()
|
|
self.original_tree = self.parser.parse_file(original_code)
|
|
self.llm_tree = self.parser.parse_file(llm_code)
|
|
self.original_code = original_code
|
|
self.llm_code = llm_code
|
|
|
|
def _sort_sections(self, sections: List[CodeSection]) -> List[CodeSection]:
|
|
return sorted(sections, key=lambda x: (
|
|
self.parser.get_section_order_index(x.type),
|
|
x.start_line
|
|
))
|
|
|
|
def _merge_sections(self, original: CodeSection, llm: CodeSection) -> CodeSection:
|
|
merged = CodeSection(
|
|
original.type,
|
|
original.name,
|
|
original.content,
|
|
original.start_line,
|
|
original.end_line,
|
|
original.parent,
|
|
original_indent=original.original_indent
|
|
)
|
|
|
|
# Crear mapas de hijos por tipo y nombre
|
|
original_children = {(c.type, c.name): c for c in original.children}
|
|
llm_children = {(c.type, c.name): c for c in llm.children}
|
|
|
|
merged_children = []
|
|
|
|
# Procesar hijos originales
|
|
for key, orig_child in original_children.items():
|
|
if key in llm_children:
|
|
llm_child = llm_children[key]
|
|
if orig_child.type in ['namespace', 'class', 'interface', 'region']:
|
|
merged_children.append(self._merge_sections(orig_child, llm_child))
|
|
else:
|
|
merged_children.append(llm_child if orig_child.content != llm_child.content else orig_child)
|
|
else:
|
|
merged_children.append(orig_child)
|
|
|
|
# Añadir nuevos hijos del LLM
|
|
for key, llm_child in llm_children.items():
|
|
if key not in original_children:
|
|
merged_children.append(llm_child)
|
|
|
|
# Ordenar los hijos según el orden definido
|
|
merged.children = self._sort_sections(merged_children)
|
|
return merged
|
|
|
|
def _generate_code(self, section: CodeSection, indent_level: int = 0) -> str:
|
|
lines = []
|
|
base_indent = section.original_indent or " " * indent_level
|
|
|
|
# Añadir atributos
|
|
for attr in section.attributes:
|
|
lines.append(base_indent + attr.lstrip())
|
|
|
|
if section.type != 'root':
|
|
content_lines = section.content.split('\n')
|
|
lines.append(base_indent + content_lines[0].lstrip())
|
|
|
|
if len(content_lines) > 1:
|
|
for line in content_lines[1:]:
|
|
if line.strip():
|
|
current_indent = re.match(r'^(\s*)', line).group(1)
|
|
content = line.lstrip()
|
|
lines.append(base_indent + current_indent + content)
|
|
|
|
if section.children:
|
|
sorted_children = self._sort_sections(section.children)
|
|
for child in sorted_children:
|
|
child_code = self._generate_code(child, indent_level + 1 if section.type != 'root' else 0)
|
|
if child_code:
|
|
lines.append(child_code)
|
|
|
|
return '\n'.join(lines)
|
|
|
|
def merge_code(self) -> str:
|
|
merged_tree = self._merge_sections(self.original_tree, self.llm_tree)
|
|
return self._generate_code(merged_tree)
|
|
|
|
def generate_diff(self) -> str:
|
|
merged = self.merge_code()
|
|
diff = difflib.unified_diff(
|
|
self.original_code.splitlines(keepends=True),
|
|
merged.splitlines(keepends=True),
|
|
fromfile='original',
|
|
tofile='merged'
|
|
)
|
|
return ''.join(diff)
|
|
|
|
def main():
|
|
configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}"))
|
|
working_directory = configs.get("working_directory", ".")
|
|
work_config = configs.get("level3", {})
|
|
|
|
input_file = work_config.get("input_file", "original.cs")
|
|
llm_file = work_config.get("llm_file", "llm_generated.cs")
|
|
output_directory = work_config.get("output_directory", ".")
|
|
|
|
input_path = os.path.join(working_directory, input_file)
|
|
llm_path = os.path.join(working_directory, llm_file)
|
|
output_merged = os.path.join(output_directory, "merged.cs")
|
|
output_diff = os.path.join(output_directory, "changes.diff")
|
|
|
|
for path in [input_path, llm_path]:
|
|
if not os.path.exists(path):
|
|
print(f"Error: File {path} does not exist")
|
|
return
|
|
|
|
os.makedirs(output_directory, exist_ok=True)
|
|
|
|
try:
|
|
with open(input_path, "r", encoding="utf-8") as f:
|
|
original_code = f.read()
|
|
with open(llm_path, "r", encoding="utf-8") as f:
|
|
llm_code = f.read()
|
|
|
|
merger = CSharpCodeMerger(original_code, llm_code)
|
|
merged_code = merger.merge_code()
|
|
|
|
with open(output_merged, "w", encoding="utf-8") as f:
|
|
f.write(merged_code)
|
|
with open(output_diff, "w", encoding="utf-8") as f:
|
|
f.write(merger.generate_diff())
|
|
|
|
print(f"Successfully processed files:")
|
|
print(f"- Merged code saved to: {output_merged}")
|
|
print(f"- Diff file saved to: {output_diff}")
|
|
|
|
except Exception as e:
|
|
print(f"Error processing files: {str(e)}")
|
|
return
|
|
|
|
if __name__ == "__main__":
|
|
main() |