ParamManagerScripts/backend/script_groups/CSharpCodeMerger/x1.py

"""
Script para hacer una union de los cambios generados por un LLM en un archivo de código C#.
"""

import os
import sys
import json
import re
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple
import difflib

# Forzar UTF-8 en la salida estándar
sys.stdout.reconfigure(encoding="utf-8")

@dataclass
class CodeSection:
    type: str
    name: str
    content: str
    start_line: int
    end_line: int
    parent: Optional['CodeSection'] = None
    children: List['CodeSection'] = None
    attributes: List[str] = None
    original_indent: str = ""

    def __post_init__(self):
        if self.children is None:
            self.children = []
        if self.attributes is None:
            self.attributes = []

class CSharpParser:
    def __init__(self):
        # Definimos el orden específico de las secciones
        self.section_order = [
            'using',
            'comment',
            'attribute',
            'namespace',
            'class',
            'interface',
            'region',
            'field',
            'property',
            'method'
        ]

        self.patterns = {
            'using': r'^\s*using\s+([^;]+);',
            'namespace': r'^\s*namespace\s+([^\s{]+)',
            'class': r'^\s*(?:public|private|internal|protected)?\s*(?:partial\s+)?(?:abstract\s+)?class\s+(\w+)',
            'interface': r'^\s*(?:public|private|internal|protected)?\s*interface\s+(\w+)',
            'method': r'^\s*(?:public|private|internal|protected)?\s*(?:virtual|override|static|async)?\s*[\w<>]+\s+(\w+)\s*\(',
            'property': r'^\s*(?:\[.+\]\s*)*(?:public|private|internal|protected)?\s*[\w<>]+\s+(\w+)\s*(?:{\s*get;|=>)',
            'field': r'^\s*(?:public|private|internal|protected)?\s*(?:readonly|static|const)?\s*[\w<>]+\s+(\w+)\s*(?:=|;)',
            'attribute': r'^\s*\[([^\]]+)\]',
            'comment': r'^\s*(?://.*|/\*.*?\*/)',
            'region': r'^\s*#region\s+(.+)$'
        }
        self.placeholder_pattern = r'//\s*\.\.\.\s*resto del código\s*\.\.\.'

    def get_section_order_index(self, section_type: str) -> int:
        try:
            return self.section_order.index(section_type)
        except ValueError:
            return len(self.section_order)

    def get_indent(self, line: str) -> str:
        match = re.match(r'^(\s*)', line)
        return match.group(1) if match else ""

    def parse_file(self, content: str) -> CodeSection:
        lines = content.split('\n')
        root = CodeSection('root', '', '', 0, len(lines))
        current_context = [root]
        current_attributes = []

        i = 0
        while i < len(lines):
            line = lines[i]
            stripped = line.strip()
            original_indent = self.get_indent(line)

            # Skip empty lines
            if not stripped:
                i += 1
                continue

            # Procesar cada tipo de sección según su orden definido
            matched = False
            for section_type in self.section_order:
                if section_type not in self.patterns:
                    continue

                pattern = self.patterns[section_type]
                match = re.match(pattern, line)

                if match:
                    name = match.group(1)
                    if section_type in ['namespace', 'class', 'interface', 'region']:
                        # Procesar secciones con bloques
                        section_lines, j = self._process_block(lines, i)
                        section = CodeSection(
                            section_type,
                            name,
                            '\n'.join(section_lines),
                            i,
                            j,
                            parent=current_context[-1],
                            attributes=current_attributes.copy(),
                            original_indent=original_indent
                        )
                        current_context[-1].children.append(section)

                        if section_type in ['namespace', 'class', 'interface']:
                            current_context.append(section)

                        i = j + 1
                    else:
                        # Procesar secciones simples
                        section = CodeSection(
                            section_type,
                            name,
                            line,
                            i,
                            i,
                            parent=current_context[-1],
                            attributes=current_attributes.copy(),
                            original_indent=original_indent
                        )
                        current_context[-1].children.append(section)
                        i += 1

                    current_attributes = []
                    matched = True
                    break

            if not matched:
                i += 1

            if stripped == '}' and len(current_context) > 1:
                current_context.pop()

        return root

    def _process_block(self, lines: List[str], start_index: int) -> Tuple[List[str], int]:
        brace_count = 0
        section_lines = []
        j = start_index

        while j < len(lines):
            current_line = lines[j]
            section_lines.append(current_line)
            brace_count += current_line.count('{') - current_line.count('}')

            if brace_count == 0 and len(section_lines) > 1:
                break

            j += 1

        return section_lines, j

class CSharpCodeMerger:
    def __init__(self, original_code: str, llm_code: str):
        self.parser = CSharpParser()
        self.original_tree = self.parser.parse_file(original_code)
        self.llm_tree = self.parser.parse_file(llm_code)
        self.original_code = original_code
        self.llm_code = llm_code

    def _sort_sections(self, sections: List[CodeSection]) -> List[CodeSection]:
        return sorted(sections, key=lambda x: (
            self.parser.get_section_order_index(x.type),
            x.start_line
        ))

    def _merge_sections(self, original: CodeSection, llm: CodeSection) -> CodeSection:
        merged = CodeSection(
            original.type,
            original.name,
            original.content,
            original.start_line,
            original.end_line,
            original.parent,
            original_indent=original.original_indent
        )

        # Crear mapas de hijos por tipo y nombre
        original_children = {(c.type, c.name): c for c in original.children}
        llm_children = {(c.type, c.name): c for c in llm.children}

        merged_children = []

        # Procesar hijos originales
        for key, orig_child in original_children.items():
            if key in llm_children:
                llm_child = llm_children[key]
                if orig_child.type in ['namespace', 'class', 'interface', 'region']:
                    merged_children.append(self._merge_sections(orig_child, llm_child))
                else:
                    merged_children.append(llm_child if orig_child.content != llm_child.content else orig_child)
            else:
                merged_children.append(orig_child)

        # Añadir nuevos hijos del LLM
        for key, llm_child in llm_children.items():
            if key not in original_children:
                merged_children.append(llm_child)

        # Ordenar los hijos según el orden definido
        merged.children = self._sort_sections(merged_children)
        return merged

    def _generate_code(self, section: CodeSection, indent_level: int = 0) -> str:
        lines = []
        base_indent = section.original_indent or "    " * indent_level

        # Añadir atributos
        for attr in section.attributes:
            lines.append(base_indent + attr.lstrip())

        if section.type != 'root':
            content_lines = section.content.split('\n')
            lines.append(base_indent + content_lines[0].lstrip())

            if len(content_lines) > 1:
                for line in content_lines[1:]:
                    if line.strip():
                        current_indent = re.match(r'^(\s*)', line).group(1)
                        content = line.lstrip()
                        lines.append(base_indent + current_indent + content)

        if section.children:
            sorted_children = self._sort_sections(section.children)
            for child in sorted_children:
                child_code = self._generate_code(child, indent_level + 1 if section.type != 'root' else 0)
                if child_code:
                    lines.append(child_code)

        return '\n'.join(lines)

    def merge_code(self) -> str:
        merged_tree = self._merge_sections(self.original_tree, self.llm_tree)
        return self._generate_code(merged_tree)

    def generate_diff(self) -> str:
        merged = self.merge_code()
        diff = difflib.unified_diff(
            self.original_code.splitlines(keepends=True),
            merged.splitlines(keepends=True),
            fromfile='original',
            tofile='merged'
        )
        return ''.join(diff)

def main():
    configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}"))
    working_directory = configs.get("working_directory", ".")
    work_config = configs.get("level3", {})

    input_file = work_config.get("input_file", "original.cs")
    llm_file = work_config.get("llm_file", "llm_generated.cs")
    output_directory = work_config.get("output_directory", ".")

    input_path = os.path.join(working_directory, input_file)
    llm_path = os.path.join(working_directory, llm_file)
    output_merged = os.path.join(output_directory, "merged.cs")
    output_diff = os.path.join(output_directory, "changes.diff")

    for path in [input_path, llm_path]:
        if not os.path.exists(path):
            print(f"Error: File {path} does not exist")
            return

    os.makedirs(output_directory, exist_ok=True)

    try:
        with open(input_path, "r", encoding="utf-8") as f:
            original_code = f.read()
        with open(llm_path, "r", encoding="utf-8") as f:
            llm_code = f.read()

        merger = CSharpCodeMerger(original_code, llm_code)
        merged_code = merger.merge_code()

        with open(output_merged, "w", encoding="utf-8") as f:
            f.write(merged_code)
        with open(output_diff, "w", encoding="utf-8") as f:
            f.write(merger.generate_diff())

        print(f"Successfully processed files:")
        print(f"- Merged code saved to: {output_merged}")
        print(f"- Diff file saved to: {output_diff}")

    except Exception as e:
        print(f"Error processing files: {str(e)}")
        return

if __name__ == "__main__":
    main()