diff --git a/__pycache__/config_manager.cpython-310.pyc b/__pycache__/config_manager.cpython-310.pyc index ed725ab..669b292 100644 Binary files a/__pycache__/config_manager.cpython-310.pyc and b/__pycache__/config_manager.cpython-310.pyc differ diff --git a/app.py b/app.py index 36add94..4c342ec 100644 --- a/app.py +++ b/app.py @@ -221,5 +221,11 @@ def handle_group_description(group): return jsonify({"status": "error", "message": str(e)}), 500 +@app.route("/api/directory-history/") +def get_directory_history(group): + history = config_manager.get_directory_history(group) + return jsonify(history) + + if __name__ == "__main__": app.run(debug=True) diff --git a/backend/script_groups/CSharpCodeMerger/esquema_group.json b/backend/script_groups/CSharpCodeMerger/esquema_group.json new file mode 100644 index 0000000..1c9e43a --- /dev/null +++ b/backend/script_groups/CSharpCodeMerger/esquema_group.json @@ -0,0 +1,4 @@ +{ + "type": "object", + "properties": {} +} \ No newline at end of file diff --git a/backend/script_groups/CSharpCodeMerger/esquema_work.json b/backend/script_groups/CSharpCodeMerger/esquema_work.json new file mode 100644 index 0000000..1c9e43a --- /dev/null +++ b/backend/script_groups/CSharpCodeMerger/esquema_work.json @@ -0,0 +1,4 @@ +{ + "type": "object", + "properties": {} +} \ No newline at end of file diff --git a/backend/script_groups/CSharpCodeMerger/work_dir.json b/backend/script_groups/CSharpCodeMerger/work_dir.json new file mode 100644 index 0000000..d938a5f --- /dev/null +++ b/backend/script_groups/CSharpCodeMerger/work_dir.json @@ -0,0 +1,6 @@ +{ + "path": "C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Emails", + "history": [ + "C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Emails" + ] +} \ No newline at end of file diff --git a/backend/script_groups/CSharpCodeMerger/x1.py b/backend/script_groups/CSharpCodeMerger/x1.py new file mode 100644 index 0000000..00716ac --- /dev/null +++ b/backend/script_groups/CSharpCodeMerger/x1.py @@ -0,0 +1,303 @@ +""" +Script para hacer una union de los cambios generados por un LLM en un archivo de código C#. +""" + +import os +import sys +import json +import re +from pathlib import Path +from dataclasses import dataclass +from typing import List, Dict, Optional, Tuple +import difflib + +# Forzar UTF-8 en la salida estándar +sys.stdout.reconfigure(encoding="utf-8") + +@dataclass +class CodeSection: + type: str + name: str + content: str + start_line: int + end_line: int + parent: Optional['CodeSection'] = None + children: List['CodeSection'] = None + attributes: List[str] = None + original_indent: str = "" + + def __post_init__(self): + if self.children is None: + self.children = [] + if self.attributes is None: + self.attributes = [] + +class CSharpParser: + def __init__(self): + # Definimos el orden específico de las secciones + self.section_order = [ + 'using', + 'comment', + 'attribute', + 'namespace', + 'class', + 'interface', + 'region', + 'field', + 'property', + 'method' + ] + + self.patterns = { + 'using': r'^\s*using\s+([^;]+);', + 'namespace': r'^\s*namespace\s+([^\s{]+)', + 'class': r'^\s*(?:public|private|internal|protected)?\s*(?:partial\s+)?(?:abstract\s+)?class\s+(\w+)', + 'interface': r'^\s*(?:public|private|internal|protected)?\s*interface\s+(\w+)', + 'method': r'^\s*(?:public|private|internal|protected)?\s*(?:virtual|override|static|async)?\s*[\w<>]+\s+(\w+)\s*\(', + 'property': r'^\s*(?:\[.+\]\s*)*(?:public|private|internal|protected)?\s*[\w<>]+\s+(\w+)\s*(?:{\s*get;|=>)', + 'field': r'^\s*(?:public|private|internal|protected)?\s*(?:readonly|static|const)?\s*[\w<>]+\s+(\w+)\s*(?:=|;)', + 'attribute': r'^\s*\[([^\]]+)\]', + 'comment': r'^\s*(?://.*|/\*.*?\*/)', + 'region': r'^\s*#region\s+(.+)$' + } + self.placeholder_pattern = r'//\s*\.\.\.\s*resto del código\s*\.\.\.' + + def get_section_order_index(self, section_type: str) -> int: + try: + return self.section_order.index(section_type) + except ValueError: + return len(self.section_order) + + def get_indent(self, line: str) -> str: + match = re.match(r'^(\s*)', line) + return match.group(1) if match else "" + + def parse_file(self, content: str) -> CodeSection: + lines = content.split('\n') + root = CodeSection('root', '', '', 0, len(lines)) + current_context = [root] + current_attributes = [] + + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.strip() + original_indent = self.get_indent(line) + + # Skip empty lines + if not stripped: + i += 1 + continue + + # Procesar cada tipo de sección según su orden definido + matched = False + for section_type in self.section_order: + if section_type not in self.patterns: + continue + + pattern = self.patterns[section_type] + match = re.match(pattern, line) + + if match: + name = match.group(1) + if section_type in ['namespace', 'class', 'interface', 'region']: + # Procesar secciones con bloques + section_lines, j = self._process_block(lines, i) + section = CodeSection( + section_type, + name, + '\n'.join(section_lines), + i, + j, + parent=current_context[-1], + attributes=current_attributes.copy(), + original_indent=original_indent + ) + current_context[-1].children.append(section) + + if section_type in ['namespace', 'class', 'interface']: + current_context.append(section) + + i = j + 1 + else: + # Procesar secciones simples + section = CodeSection( + section_type, + name, + line, + i, + i, + parent=current_context[-1], + attributes=current_attributes.copy(), + original_indent=original_indent + ) + current_context[-1].children.append(section) + i += 1 + + current_attributes = [] + matched = True + break + + if not matched: + i += 1 + + if stripped == '}' and len(current_context) > 1: + current_context.pop() + + return root + + def _process_block(self, lines: List[str], start_index: int) -> Tuple[List[str], int]: + brace_count = 0 + section_lines = [] + j = start_index + + while j < len(lines): + current_line = lines[j] + section_lines.append(current_line) + brace_count += current_line.count('{') - current_line.count('}') + + if brace_count == 0 and len(section_lines) > 1: + break + + j += 1 + + return section_lines, j + +class CSharpCodeMerger: + def __init__(self, original_code: str, llm_code: str): + self.parser = CSharpParser() + self.original_tree = self.parser.parse_file(original_code) + self.llm_tree = self.parser.parse_file(llm_code) + self.original_code = original_code + self.llm_code = llm_code + + def _sort_sections(self, sections: List[CodeSection]) -> List[CodeSection]: + return sorted(sections, key=lambda x: ( + self.parser.get_section_order_index(x.type), + x.start_line + )) + + def _merge_sections(self, original: CodeSection, llm: CodeSection) -> CodeSection: + merged = CodeSection( + original.type, + original.name, + original.content, + original.start_line, + original.end_line, + original.parent, + original_indent=original.original_indent + ) + + # Crear mapas de hijos por tipo y nombre + original_children = {(c.type, c.name): c for c in original.children} + llm_children = {(c.type, c.name): c for c in llm.children} + + merged_children = [] + + # Procesar hijos originales + for key, orig_child in original_children.items(): + if key in llm_children: + llm_child = llm_children[key] + if orig_child.type in ['namespace', 'class', 'interface', 'region']: + merged_children.append(self._merge_sections(orig_child, llm_child)) + else: + merged_children.append(llm_child if orig_child.content != llm_child.content else orig_child) + else: + merged_children.append(orig_child) + + # Añadir nuevos hijos del LLM + for key, llm_child in llm_children.items(): + if key not in original_children: + merged_children.append(llm_child) + + # Ordenar los hijos según el orden definido + merged.children = self._sort_sections(merged_children) + return merged + + def _generate_code(self, section: CodeSection, indent_level: int = 0) -> str: + lines = [] + base_indent = section.original_indent or " " * indent_level + + # Añadir atributos + for attr in section.attributes: + lines.append(base_indent + attr.lstrip()) + + if section.type != 'root': + content_lines = section.content.split('\n') + lines.append(base_indent + content_lines[0].lstrip()) + + if len(content_lines) > 1: + for line in content_lines[1:]: + if line.strip(): + current_indent = re.match(r'^(\s*)', line).group(1) + content = line.lstrip() + lines.append(base_indent + current_indent + content) + + if section.children: + sorted_children = self._sort_sections(section.children) + for child in sorted_children: + child_code = self._generate_code(child, indent_level + 1 if section.type != 'root' else 0) + if child_code: + lines.append(child_code) + + return '\n'.join(lines) + + def merge_code(self) -> str: + merged_tree = self._merge_sections(self.original_tree, self.llm_tree) + return self._generate_code(merged_tree) + + def generate_diff(self) -> str: + merged = self.merge_code() + diff = difflib.unified_diff( + self.original_code.splitlines(keepends=True), + merged.splitlines(keepends=True), + fromfile='original', + tofile='merged' + ) + return ''.join(diff) + +def main(): + configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}")) + working_directory = configs.get("working_directory", ".") + work_config = configs.get("level3", {}) + + input_file = work_config.get("input_file", "original.cs") + llm_file = work_config.get("llm_file", "llm_generated.cs") + output_directory = work_config.get("output_directory", ".") + + input_path = os.path.join(working_directory, input_file) + llm_path = os.path.join(working_directory, llm_file) + output_merged = os.path.join(output_directory, "merged.cs") + output_diff = os.path.join(output_directory, "changes.diff") + + for path in [input_path, llm_path]: + if not os.path.exists(path): + print(f"Error: File {path} does not exist") + return + + os.makedirs(output_directory, exist_ok=True) + + try: + with open(input_path, "r", encoding="utf-8") as f: + original_code = f.read() + with open(llm_path, "r", encoding="utf-8") as f: + llm_code = f.read() + + merger = CSharpCodeMerger(original_code, llm_code) + merged_code = merger.merge_code() + + with open(output_merged, "w", encoding="utf-8") as f: + f.write(merged_code) + with open(output_diff, "w", encoding="utf-8") as f: + f.write(merger.generate_diff()) + + print(f"Successfully processed files:") + print(f"- Merged code saved to: {output_merged}") + print(f"- Diff file saved to: {output_diff}") + + except Exception as e: + print(f"Error processing files: {str(e)}") + return + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/script_groups/EmailCrono/description.json b/backend/script_groups/EmailCrono/description.json index 1160d57..b6f7dae 100644 --- a/backend/script_groups/EmailCrono/description.json +++ b/backend/script_groups/EmailCrono/description.json @@ -2,5 +2,5 @@ "name": "Desempaquetado de Emails EML", "description": "This script processes email files (.eml) into a chronological narrative in Markdown format, optimized for processing with Large Language Models (LLMs). It extracts essential information from emails while removing unnecessary metadata, creating a clean, temporal narrative that can be easily analyzed. ", "version": "1.0", - "author": "Unknown" + "author": "Miguel" } \ No newline at end of file diff --git a/backend/script_groups/EmailCrono/utils/__pycache__/email_parser.cpython-310.pyc b/backend/script_groups/EmailCrono/utils/__pycache__/email_parser.cpython-310.pyc index b36b5f2..5a46d01 100644 Binary files a/backend/script_groups/EmailCrono/utils/__pycache__/email_parser.cpython-310.pyc and b/backend/script_groups/EmailCrono/utils/__pycache__/email_parser.cpython-310.pyc differ diff --git a/backend/script_groups/EmailCrono/utils/email_parser.py b/backend/script_groups/EmailCrono/utils/email_parser.py index 6ba70e9..58d296b 100644 --- a/backend/script_groups/EmailCrono/utils/email_parser.py +++ b/backend/script_groups/EmailCrono/utils/email_parser.py @@ -82,35 +82,101 @@ def _html_a_markdown(html): if not rows: continue - markdown_table = [] - max_widths = [] + # Matriz para almacenar la tabla procesada + table_matrix = [] + max_cols = 0 - # Calcular anchos máximos - for row in rows: + # Primera pasada: crear matriz y procesar rowspans/colspans + row_idx = 0 + while row_idx < len(rows): + row = rows[row_idx] cells = row.find_all(['th', 'td']) - while len(max_widths) < len(cells): - max_widths.append(0) - for i, cell in enumerate(cells): + if not cells: + row_idx += 1 + continue + + # Expandir matriz si es necesario + while len(table_matrix) <= row_idx: + table_matrix.append([]) + + col_idx = 0 + for cell in cells: + # Encontrar la siguiente columna disponible + while col_idx < len(table_matrix[row_idx]) and table_matrix[row_idx][col_idx] is not None: + col_idx += 1 + + # Obtener rowspan y colspan + rowspan = int(cell.get('rowspan', 1)) + colspan = int(cell.get('colspan', 1)) + + # Procesar el texto de la celda reemplazando saltos de línea por
cell_text = cell.get_text().strip() - max_widths[i] = max(max_widths[i], len(cell_text)) + cell_text = cell_text.replace('\n', '
') + cell_text = re.sub(r'\s*
\s*
\s*', '
', cell_text) # Eliminar
múltiples + cell_text = cell_text.strip() + + # Rellenar la matriz con el texto y None para las celdas combinadas + for r in range(rowspan): + current_row = row_idx + r + # Expandir matriz si es necesario + while len(table_matrix) <= current_row: + table_matrix.append([]) + # Expandir fila si es necesario + while len(table_matrix[current_row]) <= col_idx + colspan - 1: + table_matrix[current_row].append(None) + + for c in range(colspan): + if r == 0 and c == 0: + table_matrix[current_row][col_idx + c] = cell_text + else: + table_matrix[current_row][col_idx + c] = '' + + col_idx += colspan + + max_cols = max(max_cols, col_idx) + row_idx += 1 - # Construir tabla markdown - if max_widths: # Solo si tenemos celdas válidas - header_row = rows[0].find_all(['th', 'td']) - header = '| ' + ' | '.join(cell.get_text().strip().ljust(max_widths[i]) - for i, cell in enumerate(header_row)) + ' |' - separator = '|' + '|'.join('-' * (width + 2) for width in max_widths) + '|' + # Asegurar que todas las filas tengan el mismo número de columnas + for row in table_matrix: + while len(row) < max_cols: + row.append('') + # Calcular anchos máximos por columna + col_widths = [0] * max_cols + for row in table_matrix: + for col_idx, cell in enumerate(row): + if cell is not None: + col_widths[col_idx] = max(col_widths[col_idx], len(str(cell))) + + # Generar tabla Markdown + markdown_table = [] + + # Cabecera + if table_matrix: + header = '|' + for col_idx, width in enumerate(col_widths): + cell = str(table_matrix[0][col_idx] or '') + header += f' {cell.ljust(width)} |' markdown_table.append(header) + + # Separador + separator = '|' + for width in col_widths: + separator += '-' * (width + 2) + '|' markdown_table.append(separator) - for row in rows[1:]: - cells = row.find_all(['td', 'th']) - row_text = '| ' + ' | '.join(cell.get_text().strip().ljust(max_widths[i]) - for i, cell in enumerate(cells)) + ' |' + # Contenido + for row_idx in range(1, len(table_matrix)): + row_text = '|' + for col_idx, width in enumerate(col_widths): + cell = str(table_matrix[row_idx][col_idx] or '') + row_text += f' {cell.ljust(width)} |' markdown_table.append(row_text) - - table.replace_with(soup.new_string('\n' + '\n'.join(markdown_table))) + + # Reemplazar la tabla HTML con la versión Markdown + if markdown_table: + table.replace_with(soup.new_string('\n' + '\n'.join(markdown_table) + '\n')) + except Exception as e: print(f"Error procesando tabla: {str(e)}") continue diff --git a/backend/script_groups/EmailCrono/work_dir.json b/backend/script_groups/EmailCrono/work_dir.json index 544f154..e6cb65a 100644 --- a/backend/script_groups/EmailCrono/work_dir.json +++ b/backend/script_groups/EmailCrono/work_dir.json @@ -1,3 +1,11 @@ { - "path": "C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails" + "path": "C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\EmailTody", + "history": [ + "C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\EmailTody", + "C:\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Reporte\\Emails", + "C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Emails", + "C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\Emails\\Trial", + "C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\Emails", + "C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\Emails\\Error de tablas" + ] } \ No newline at end of file diff --git a/backend/script_groups/example_group/work_dir.json b/backend/script_groups/example_group/work_dir.json index 92dc8d3..41d1283 100644 --- a/backend/script_groups/example_group/work_dir.json +++ b/backend/script_groups/example_group/work_dir.json @@ -1,3 +1,6 @@ { - "path": "C:/Estudio" + "path": "C:\\Estudio", + "history": [ + "C:\\Estudio" + ] } \ No newline at end of file diff --git a/backend/script_groups/ragex/data.json b/backend/script_groups/ragex/data.json new file mode 100644 index 0000000..0e0dcd2 --- /dev/null +++ b/backend/script_groups/ragex/data.json @@ -0,0 +1,3 @@ +{ + +} \ No newline at end of file diff --git a/backend/script_groups/ragex/description.json b/backend/script_groups/ragex/description.json new file mode 100644 index 0000000..6ab0524 --- /dev/null +++ b/backend/script_groups/ragex/description.json @@ -0,0 +1,6 @@ +{ + "name": "RAGEX", + "description": "This script processes text files into a chronological narrative in Markdown format, optimized for processing with Large Language Models (LLMs). It extracts essential information from text files while removing unnecessary metadata, creating a clean, temporal narrative that can be easily analyzed.", + "version": "1.0", + "author": "Miguel" +} \ No newline at end of file diff --git a/backend/script_groups/ragex/esquema_group.json b/backend/script_groups/ragex/esquema_group.json new file mode 100644 index 0000000..0e0dcd2 --- /dev/null +++ b/backend/script_groups/ragex/esquema_group.json @@ -0,0 +1,3 @@ +{ + +} \ No newline at end of file diff --git a/backend/script_groups/ragex/esquema_work.json b/backend/script_groups/ragex/esquema_work.json new file mode 100644 index 0000000..9c49659 --- /dev/null +++ b/backend/script_groups/ragex/esquema_work.json @@ -0,0 +1,21 @@ +{ + "type": "object", + "properties": { + "in_dir": { + "type": "string", + "format": "directory", + "title": "Subdirectorio desde donde hacer la ingesta de los datos", + "description": "Subdirectorio desde donde hacer la ingesta de los datos" + }, + "model": { + "type": "string", + "title": "Model", + "description": "OpenAI Model" + }, + "query": { + "type": "string", + "title": "Consulta", + "description": "Consulta" + } + } +} \ No newline at end of file diff --git a/backend/script_groups/ragex/openai_api_key.py b/backend/script_groups/ragex/openai_api_key.py new file mode 100644 index 0000000..1826e5d --- /dev/null +++ b/backend/script_groups/ragex/openai_api_key.py @@ -0,0 +1,3 @@ +# Configura tu clave API de OpenAI +def openai_api_key(): + return 'sk-HIY5DSK03Lr' \ No newline at end of file diff --git a/backend/script_groups/ragex/work_dir.json b/backend/script_groups/ragex/work_dir.json new file mode 100644 index 0000000..21a63c1 --- /dev/null +++ b/backend/script_groups/ragex/work_dir.json @@ -0,0 +1,6 @@ +{ + "path": "D:\\Proyectos\\Scripts\\RAG\\TEST", + "history": [ + "D:\\Proyectos\\Scripts\\RAG\\TEST" + ] +} \ No newline at end of file diff --git a/backend/script_groups/ragex/x1.py b/backend/script_groups/ragex/x1.py new file mode 100644 index 0000000..114acca --- /dev/null +++ b/backend/script_groups/ragex/x1.py @@ -0,0 +1,112 @@ +""" +Este script realiza la ingesta de los datos alammacenados en el subdirectorio de ingesta. +""" + +import os +import sys +from pathlib import Path +import json +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_community.vectorstores import Chroma +from langchain_openai import OpenAIEmbeddings # Cambiado a OpenAI Embeddings +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_core.documents import Document +import os +import glob + + +def load_documents(directory): + documents = [] + + # Cargar archivos markdown + for md_file in glob.glob(os.path.join(directory, "**/*.md"), recursive=True): + with open(md_file, "r", encoding="utf-8") as f: + content = f.read() + documents.append( + { + "content": content, + "metadata": {"source": md_file, "type": "markdown"}, + } + ) + + # Cargar archivos de texto + for txt_file in glob.glob(os.path.join(directory, "**/*.txt"), recursive=True): + with open(txt_file, "r", encoding="utf-8") as f: + content = f.read() + documents.append( + {"content": content, "metadata": {"source": txt_file, "type": "text"}} + ) + + return documents + + +def process_documents(documents, db_directory): + # Usar OpenAI Embeddings en lugar de HuggingFace + embeddings = OpenAIEmbeddings(model="text-embedding-3-small") + + # Dividir documentos en chunks + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, + chunk_overlap=200, + separators=["\n## ", "\n### ", "\n#### ", "\n", " ", ""], + keep_separator=True, + ) + + docs = [] + for doc in documents: + chunks = text_splitter.split_text(doc["content"]) + for i, chunk in enumerate(chunks): + docs.append( + Document( + page_content=chunk, + metadata={ + **doc["metadata"], + "chunk_id": i, + "chunk": chunk[:100] + "...", # Extracto para referencia + }, + ) + ) + + # Configurar Chroma para evitar dependencia de ONNX + from chromadb.config import Settings + + # Crear o actualizar la base de datos vectorial con configuración específica + db = Chroma.from_documents( + docs, + embeddings, + persist_directory=db_directory, + client_settings=Settings(anonymized_telemetry=False, is_persistent=True), + ) + db.persist() + + print(f"Procesados {len(docs)} fragmentos de {len(documents)} documentos") + return db + + +def main(): + # Cargar configuraciones del entorno + configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}")) + + # Obtener working directory + working_directory = configs.get("working_directory", ".") + + # Obtener configuraciones de nivel 2 (grupo) + group_config = configs.get("level2", {}) + + work_config = configs.get("level3", {}) + in_dir = work_config.get("in_dir", ".") + + docs_directory = os.path.join(working_directory, in_dir) + db_directory = os.path.join(working_directory, "chroma_db") + + print("Cargando documentos...") + documents = load_documents(docs_directory) + print(f"Se encontraron {len(documents)} documentos.") + + print("Procesando e indexando documentos...") + db = process_documents(documents, db_directory) + print("¡Ingesta completada con éxito!") + + +if __name__ == "__main__": + main() diff --git a/backend/script_groups/ragex/x2.py b/backend/script_groups/ragex/x2.py new file mode 100644 index 0000000..ee7205e --- /dev/null +++ b/backend/script_groups/ragex/x2.py @@ -0,0 +1,126 @@ +""" +Este script realiza la consulta usando RAGEX a la base de datos de documentos. +""" + +import os +import sys +from pathlib import Path +import json +from langchain_community.vectorstores import Chroma +from langchain_openai import ( + OpenAIEmbeddings, +) # Cambiado de HuggingFaceEmbeddings a OpenAIEmbeddings +from langchain_openai import ChatOpenAI +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnablePassthrough +from rich.console import Console +from rich.markdown import Markdown +import os +import argparse +from openai_api_key import openai_api_key + + +console = Console() + + +class CitationTracker: + def __init__(self): + self.citations = [] + + def add_citation(self, text, metadata): + self.citations.append({"text": text, "metadata": metadata}) + + def get_formatted_citations(self): + result = "\n## Fuentes\n\n" + for i, citation in enumerate(self.citations, 1): + source = citation["metadata"]["source"] + result += f"{i}. [{os.path.basename(source)}]({source}) - Fragmento {citation['metadata']['chunk_id']}\n" + return result + + +def search_with_citation(query, db_directory, model="gpt-3.5-turbo"): + # Cargar embeddings y base de datos + embeddings = OpenAIEmbeddings( + model="text-embedding-3-small" + ) # Usar OpenAI Embeddings igual que en x1.py + + db = Chroma(persist_directory=db_directory, embedding_function=embeddings) + + api_key = openai_api_key() + os.environ["OPENAI_API_KEY"] = api_key + + # Configurar el LLM de OpenAI + llm = ChatOpenAI(model_name=model) + + # Rastreador de citas + citation_tracker = CitationTracker() + + # Recuperar documentos relevantes + retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5}) + + # Plantilla para el prompt + template = """ + Responde a la siguiente pregunta basándote exclusivamente en la información proporcionada. + Incluye referencias a las fuentes originales para cada afirmación importante usando [Fuente N]. + Si la información no es suficiente, indícalo claramente. + + Contexto: + {context} + + Pregunta: {question} + + Respuesta (incluye [Fuente N] para citar): + """ + + prompt = ChatPromptTemplate.from_template(template) + + # Función para formatear el contexto + def format_docs(docs): + formatted_context = "" + for i, doc in enumerate(docs, 1): + citation_tracker.add_citation(doc.page_content, doc.metadata) + formatted_context += f"[Fuente {i}]: {doc.page_content}\n\n" + return formatted_context + + # Cadena RAG + rag_chain = ( + {"context": retriever | format_docs, "question": RunnablePassthrough()} + | prompt + | llm + | StrOutputParser() + ) + + # Ejecutar búsqueda + response = rag_chain.invoke(query) + + # Agregar citas al final + full_response = response + "\n\n" + citation_tracker.get_formatted_citations() + + return full_response + + +def main(): + # Cargar configuraciones del entorno + configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}")) + + # Obtener working directory + working_directory = configs.get("working_directory", ".") + + # Obtener configuraciones de nivel 2 (grupo) + group_config = configs.get("level2", {}) + + work_config = configs.get("level3", {}) + in_dir = work_config.get("in_dir", ".") + + docs_directory = os.path.join(working_directory, in_dir) + model = work_config.get("model", "gpt-3.5-turbo") + query = work_config.get("query", "") + db_directory = os.path.join(working_directory, "chroma_db") + + result = search_with_citation(query, db_directory, model) + console.print(Markdown(result)) + + +if __name__ == "__main__": + main() diff --git a/config_manager.py b/config_manager.py index f776713..aedcc56 100644 --- a/config_manager.py +++ b/config_manager.py @@ -454,6 +454,9 @@ class ConfigurationManager: with open(work_dir_path, "r") as f: data = json.load(f) path = data.get("path", "") + # Normalizar separadores de ruta + if path: + path = os.path.normpath(path) # Actualizar la variable de instancia si hay una ruta válida if path and os.path.exists(path): self.working_directory = path @@ -462,16 +465,45 @@ class ConfigurationManager: return "" def set_work_dir(self, group: str, path: str) -> Dict[str, str]: - """Set working directory path for a script group.""" + """Set working directory path for a script group and update history.""" + # Normalizar el path recibido + path = os.path.normpath(path) + if not os.path.exists(path): return {"status": "error", "message": "Directory does not exist"} work_dir_path = os.path.join(self.script_groups_path, group, "work_dir.json") try: - # Guardar la ruta en work_dir.json + # Cargar datos existentes o crear nuevos + try: + with open(work_dir_path, "r") as f: + data = json.load(f) + # Normalizar paths existentes en el historial + if "history" in data: + data["history"] = [os.path.normpath(p) for p in data["history"]] + except (FileNotFoundError, json.JSONDecodeError): + data = {"path": "", "history": []} + + # Actualizar path actual + data["path"] = path + + # Actualizar historial + if "history" not in data: + data["history"] = [] + + # Eliminar la ruta del historial si ya existe (usando path normalizado) + data["history"] = [p for p in data["history"] if os.path.normpath(p) != path] + + # Agregar la ruta al principio del historial + data["history"].insert(0, path) + + # Mantener solo los últimos 10 directorios + data["history"] = data["history"][:10] + + # Guardar datos actualizados with open(work_dir_path, "w") as f: - json.dump({"path": path}, f, indent=2) + json.dump(data, f, indent=2) # Actualizar la variable de instancia self.working_directory = path @@ -485,3 +517,16 @@ class ConfigurationManager: return {"status": "success", "path": path} except Exception as e: return {"status": "error", "message": str(e)} + + def get_directory_history(self, group: str) -> List[str]: + """Get the directory history for a script group.""" + work_dir_path = os.path.join(self.script_groups_path, group, "work_dir.json") + try: + with open(work_dir_path, "r") as f: + data = json.load(f) + # Normalizar todos los paths en el historial + history = [os.path.normpath(p) for p in data.get("history", [])] + # Filtrar solo directorios que existen + return [p for p in history if os.path.exists(p)] + except (FileNotFoundError, json.JSONDecodeError): + return [] diff --git a/data/log.txt b/data/log.txt index 37357af..45ca4a5 100644 --- a/data/log.txt +++ b/data/log.txt @@ -1,116 +1,8 @@ -[09:40:16] Iniciando ejecución de x1.py -[09:40:18] Working directory: C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails -[09:40:18] Input directory: C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails -[09:40:18] Output directory: C:/Users/migue/OneDrive/Miguel/Obsidean/Trabajo/VM/04-InLavoro/HENKEL/93040 - HENKEL - BowlingGreen/Description/HENKEL - ALPLA - AUTEFA - Batch Data -[09:40:18] Cronologia file: C:/Users/migue/OneDrive/Miguel/Obsidean/Trabajo/VM/04-InLavoro/HENKEL/93040 - HENKEL - BowlingGreen/Description/HENKEL - ALPLA - AUTEFA - Batch Data\cronologia.md -[09:40:18] Attachments directory: C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails\adjuntos -[09:40:18] Beautify rules file: D:\Proyectos\Scripts\ParamManagerScripts\backend\script_groups\EmailCrono\config\beautify_rules.json -[09:40:18] Found 13 .eml files -[09:40:18] Loaded 0 existing messages -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ 9.3040-3074 ALPLA BG open points.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ 9.3061-TLO26-L42 automatic change over test.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ Alpla BOW2 - Line emptying button 6168.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ IDH_BTL.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ OPC-UA interface Vetro - Bowling Green 2 9.3040-3074.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ R_ I_ [EXT] R_ Vetro Conveyor 9.3674.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\NEXT2 - Data - ALPLA information verification.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RE_ Automatic changeover trial.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RE_ OPC-UA interface Vetro - Bowling Green 2 9.3040-3074.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RE_ [EXT] RE_ Vetro_ALPLA information verification.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\Re_ _EXT_ Next + 1 - HENKEL - ALPLA - AUTEFA.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RV_ RE_ [EXT] RE_ Vetro_ALPLA information verification.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\R_ [EXT] RE_ Vetro_ALPLA information verification 9.3060-3067.eml -[09:40:18] Aplicando reglas de prioridad 1 -[09:40:18] Aplicando reglas de prioridad 2 -[09:40:18] Aplicando reglas de prioridad 3 -[09:40:18] Aplicando reglas de prioridad 4 -[09:40:18] Estadísticas de procesamiento: -[09:40:18] - Total mensajes encontrados: 38 -[09:40:18] - Mensajes únicos añadidos: 22 -[09:40:18] - Mensajes duplicados ignorados: 16 -[09:40:18] Writing 22 messages to C:/Users/migue/OneDrive/Miguel/Obsidean/Trabajo/VM/04-InLavoro/HENKEL/93040 - HENKEL - BowlingGreen/Description/HENKEL - ALPLA - AUTEFA - Batch Data\cronologia.md -[09:40:18] Ejecución completada +[20:41:53] Iniciando ejecución de x1.py +[20:41:57] Cargando documentos... +[20:41:57] Se encontraron 1 documentos. +[20:41:57] Procesando e indexando documentos... +[20:44:57] Iniciando ejecución de x1.py +[20:45:01] Cargando documentos... +[20:45:01] Se encontraron 1 documentos. +[20:45:01] Procesando e indexando documentos... diff --git a/services/llm/openai_api_key.py b/services/llm/openai_api_key.py new file mode 100644 index 0000000..39f5b3a --- /dev/null +++ b/services/llm/openai_api_key.py @@ -0,0 +1,3 @@ +# Configura tu clave API de OpenAI +def openai_api_key(): + return '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' \ No newline at end of file diff --git a/services/llm/openai_service.py b/services/llm/openai_service.py index 7363bbc..9146287 100644 --- a/services/llm/openai_service.py +++ b/services/llm/openai_service.py @@ -6,11 +6,11 @@ from openai import OpenAI from typing import Dict, List import json from .base import LLMService -from config.api_keys import APIKeyManager +from openai_api_key import openai_api_key class OpenAIService(LLMService): def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3): - api_key = APIKeyManager.get_openai_key() + api_key = openai_api_key() if not api_key: raise ValueError("OpenAI API key not found. Please set up your API keys.") diff --git a/static/js/scripts.js b/static/js/scripts.js index 3520199..35c9499 100644 --- a/static/js/scripts.js +++ b/static/js/scripts.js @@ -535,6 +535,7 @@ async function setWorkingDirectory() { await updateWorkingDirectory(path); } +// Modificar initWorkingDirectory para cargar también el historial async function initWorkingDirectory() { if (!currentGroup) return; @@ -543,6 +544,7 @@ async function initWorkingDirectory() { if (result.status === 'success' && result.path) { await updateWorkingDirectory(result.path); } + await loadDirectoryHistory(); } async function browseDirectory() { @@ -565,28 +567,64 @@ async function browseDirectory() { async function updateWorkingDirectory(path) { console.log('Updating working directory:', { path, group: currentGroup }); // Debug line - const response = await fetch('/api/working-directory', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - path: path, - group: currentGroup - }) - }); - - const result = await response.json(); - console.log('Update result:', result); // Debug line - - if (result.status === 'success') { - // Actualizar input - document.getElementById('working-directory').value = path; + try { + const response = await fetch('/api/working-directory', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + path: path, + group: currentGroup + }) + }); - // Recargar configuración de nivel 3 - const configResponse = await fetch(`/api/config/3?group=${currentGroup}`); - const data = await configResponse.json(); - await renderForm('level3-form', data); - } else { - alert('Error: ' + (result.message || 'No se pudo actualizar el directorio de trabajo')); + const result = await response.json(); + console.log('Update result:', result); // Debug line + + if (result.status === 'success') { + // Actualizar input y lista de directorios + document.getElementById('working-directory').value = path; + await loadDirectoryHistory(); + + // Recargar configuración de nivel 3 + const configResponse = await fetch(`/api/config/3?group=${currentGroup}`); + const data = await configResponse.json(); + await renderForm('level3-form', data); + } else { + alert('Error: ' + (result.message || 'No se pudo actualizar el directorio de trabajo')); + } + } catch (error) { + console.error('Error updating working directory:', error); + alert('Error actualizando el directorio de trabajo: ' + error.message); + } +} + +async function loadDirectoryHistory() { + try { + const response = await fetch(`/api/directory-history/${currentGroup}`); + const history = await response.json(); + + const select = document.getElementById('directory-history'); + select.innerHTML = ''; + + history.forEach(dir => { + const option = document.createElement('option'); + option.value = dir; + option.textContent = dir; + // Marcar como seleccionado si es el directorio actual + if (dir === document.getElementById('working-directory').value) { + option.selected = true; + } + select.appendChild(option); + }); + } catch (error) { + console.error('Error loading directory history:', error); + } +} + +function loadHistoryDirectory(path) { + if (path) { + document.getElementById('working-directory').value = path; + updateWorkingDirectory(path); // Cambiado de setWorkingDirectory a updateWorkingDirectory } } @@ -657,6 +695,16 @@ async function initializeApp() { await initWorkingDirectory(); await loadConfigs(); + // Mostrar level3-content por defecto + const level3Content = document.getElementById('level3-content'); + if (level3Content) { + level3Content.classList.remove('hidden'); + const button = document.querySelector(`[onclick="toggleConfig('level3-content')"]`); + if (button) { + button.innerText = 'Ocultar Configuración'; + } + } + } catch (error) { console.error('Error during initialization:', error); } diff --git a/templates/index.html b/templates/index.html index f9e4a8d..b0af258 100644 --- a/templates/index.html +++ b/templates/index.html @@ -109,6 +109,12 @@ Confirmar + +
+ +
@@ -117,10 +123,10 @@

Configuración del Directorio

-