Intento de crear un RagEx

This commit is contained in:
Miguel 2025-04-04 14:29:40 +02:00
parent 9d090d2db7
commit 6070938bcc
25 changed files with 841 additions and 167 deletions

6
app.py
View File

@ -221,5 +221,11 @@ def handle_group_description(group):
return jsonify({"status": "error", "message": str(e)}), 500
@app.route("/api/directory-history/<group>")
def get_directory_history(group):
history = config_manager.get_directory_history(group)
return jsonify(history)
if __name__ == "__main__":
app.run(debug=True)

View File

@ -0,0 +1,4 @@
{
"type": "object",
"properties": {}
}

View File

@ -0,0 +1,4 @@
{
"type": "object",
"properties": {}
}

View File

@ -0,0 +1,6 @@
{
"path": "C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Emails",
"history": [
"C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Emails"
]
}

View File

@ -0,0 +1,303 @@
"""
Script para hacer una union de los cambios generados por un LLM en un archivo de código C#.
"""
import os
import sys
import json
import re
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple
import difflib
# Forzar UTF-8 en la salida estándar
sys.stdout.reconfigure(encoding="utf-8")
@dataclass
class CodeSection:
type: str
name: str
content: str
start_line: int
end_line: int
parent: Optional['CodeSection'] = None
children: List['CodeSection'] = None
attributes: List[str] = None
original_indent: str = ""
def __post_init__(self):
if self.children is None:
self.children = []
if self.attributes is None:
self.attributes = []
class CSharpParser:
def __init__(self):
# Definimos el orden específico de las secciones
self.section_order = [
'using',
'comment',
'attribute',
'namespace',
'class',
'interface',
'region',
'field',
'property',
'method'
]
self.patterns = {
'using': r'^\s*using\s+([^;]+);',
'namespace': r'^\s*namespace\s+([^\s{]+)',
'class': r'^\s*(?:public|private|internal|protected)?\s*(?:partial\s+)?(?:abstract\s+)?class\s+(\w+)',
'interface': r'^\s*(?:public|private|internal|protected)?\s*interface\s+(\w+)',
'method': r'^\s*(?:public|private|internal|protected)?\s*(?:virtual|override|static|async)?\s*[\w<>]+\s+(\w+)\s*\(',
'property': r'^\s*(?:\[.+\]\s*)*(?:public|private|internal|protected)?\s*[\w<>]+\s+(\w+)\s*(?:{\s*get;|=>)',
'field': r'^\s*(?:public|private|internal|protected)?\s*(?:readonly|static|const)?\s*[\w<>]+\s+(\w+)\s*(?:=|;)',
'attribute': r'^\s*\[([^\]]+)\]',
'comment': r'^\s*(?://.*|/\*.*?\*/)',
'region': r'^\s*#region\s+(.+)$'
}
self.placeholder_pattern = r'//\s*\.\.\.\s*resto del código\s*\.\.\.'
def get_section_order_index(self, section_type: str) -> int:
try:
return self.section_order.index(section_type)
except ValueError:
return len(self.section_order)
def get_indent(self, line: str) -> str:
match = re.match(r'^(\s*)', line)
return match.group(1) if match else ""
def parse_file(self, content: str) -> CodeSection:
lines = content.split('\n')
root = CodeSection('root', '', '', 0, len(lines))
current_context = [root]
current_attributes = []
i = 0
while i < len(lines):
line = lines[i]
stripped = line.strip()
original_indent = self.get_indent(line)
# Skip empty lines
if not stripped:
i += 1
continue
# Procesar cada tipo de sección según su orden definido
matched = False
for section_type in self.section_order:
if section_type not in self.patterns:
continue
pattern = self.patterns[section_type]
match = re.match(pattern, line)
if match:
name = match.group(1)
if section_type in ['namespace', 'class', 'interface', 'region']:
# Procesar secciones con bloques
section_lines, j = self._process_block(lines, i)
section = CodeSection(
section_type,
name,
'\n'.join(section_lines),
i,
j,
parent=current_context[-1],
attributes=current_attributes.copy(),
original_indent=original_indent
)
current_context[-1].children.append(section)
if section_type in ['namespace', 'class', 'interface']:
current_context.append(section)
i = j + 1
else:
# Procesar secciones simples
section = CodeSection(
section_type,
name,
line,
i,
i,
parent=current_context[-1],
attributes=current_attributes.copy(),
original_indent=original_indent
)
current_context[-1].children.append(section)
i += 1
current_attributes = []
matched = True
break
if not matched:
i += 1
if stripped == '}' and len(current_context) > 1:
current_context.pop()
return root
def _process_block(self, lines: List[str], start_index: int) -> Tuple[List[str], int]:
brace_count = 0
section_lines = []
j = start_index
while j < len(lines):
current_line = lines[j]
section_lines.append(current_line)
brace_count += current_line.count('{') - current_line.count('}')
if brace_count == 0 and len(section_lines) > 1:
break
j += 1
return section_lines, j
class CSharpCodeMerger:
def __init__(self, original_code: str, llm_code: str):
self.parser = CSharpParser()
self.original_tree = self.parser.parse_file(original_code)
self.llm_tree = self.parser.parse_file(llm_code)
self.original_code = original_code
self.llm_code = llm_code
def _sort_sections(self, sections: List[CodeSection]) -> List[CodeSection]:
return sorted(sections, key=lambda x: (
self.parser.get_section_order_index(x.type),
x.start_line
))
def _merge_sections(self, original: CodeSection, llm: CodeSection) -> CodeSection:
merged = CodeSection(
original.type,
original.name,
original.content,
original.start_line,
original.end_line,
original.parent,
original_indent=original.original_indent
)
# Crear mapas de hijos por tipo y nombre
original_children = {(c.type, c.name): c for c in original.children}
llm_children = {(c.type, c.name): c for c in llm.children}
merged_children = []
# Procesar hijos originales
for key, orig_child in original_children.items():
if key in llm_children:
llm_child = llm_children[key]
if orig_child.type in ['namespace', 'class', 'interface', 'region']:
merged_children.append(self._merge_sections(orig_child, llm_child))
else:
merged_children.append(llm_child if orig_child.content != llm_child.content else orig_child)
else:
merged_children.append(orig_child)
# Añadir nuevos hijos del LLM
for key, llm_child in llm_children.items():
if key not in original_children:
merged_children.append(llm_child)
# Ordenar los hijos según el orden definido
merged.children = self._sort_sections(merged_children)
return merged
def _generate_code(self, section: CodeSection, indent_level: int = 0) -> str:
lines = []
base_indent = section.original_indent or " " * indent_level
# Añadir atributos
for attr in section.attributes:
lines.append(base_indent + attr.lstrip())
if section.type != 'root':
content_lines = section.content.split('\n')
lines.append(base_indent + content_lines[0].lstrip())
if len(content_lines) > 1:
for line in content_lines[1:]:
if line.strip():
current_indent = re.match(r'^(\s*)', line).group(1)
content = line.lstrip()
lines.append(base_indent + current_indent + content)
if section.children:
sorted_children = self._sort_sections(section.children)
for child in sorted_children:
child_code = self._generate_code(child, indent_level + 1 if section.type != 'root' else 0)
if child_code:
lines.append(child_code)
return '\n'.join(lines)
def merge_code(self) -> str:
merged_tree = self._merge_sections(self.original_tree, self.llm_tree)
return self._generate_code(merged_tree)
def generate_diff(self) -> str:
merged = self.merge_code()
diff = difflib.unified_diff(
self.original_code.splitlines(keepends=True),
merged.splitlines(keepends=True),
fromfile='original',
tofile='merged'
)
return ''.join(diff)
def main():
configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}"))
working_directory = configs.get("working_directory", ".")
work_config = configs.get("level3", {})
input_file = work_config.get("input_file", "original.cs")
llm_file = work_config.get("llm_file", "llm_generated.cs")
output_directory = work_config.get("output_directory", ".")
input_path = os.path.join(working_directory, input_file)
llm_path = os.path.join(working_directory, llm_file)
output_merged = os.path.join(output_directory, "merged.cs")
output_diff = os.path.join(output_directory, "changes.diff")
for path in [input_path, llm_path]:
if not os.path.exists(path):
print(f"Error: File {path} does not exist")
return
os.makedirs(output_directory, exist_ok=True)
try:
with open(input_path, "r", encoding="utf-8") as f:
original_code = f.read()
with open(llm_path, "r", encoding="utf-8") as f:
llm_code = f.read()
merger = CSharpCodeMerger(original_code, llm_code)
merged_code = merger.merge_code()
with open(output_merged, "w", encoding="utf-8") as f:
f.write(merged_code)
with open(output_diff, "w", encoding="utf-8") as f:
f.write(merger.generate_diff())
print(f"Successfully processed files:")
print(f"- Merged code saved to: {output_merged}")
print(f"- Diff file saved to: {output_diff}")
except Exception as e:
print(f"Error processing files: {str(e)}")
return
if __name__ == "__main__":
main()

View File

@ -2,5 +2,5 @@
"name": "Desempaquetado de Emails EML",
"description": "This script processes email files (.eml) into a chronological narrative in Markdown format, optimized for processing with Large Language Models (LLMs). It extracts essential information from emails while removing unnecessary metadata, creating a clean, temporal narrative that can be easily analyzed. ",
"version": "1.0",
"author": "Unknown"
"author": "Miguel"
}

View File

@ -82,35 +82,101 @@ def _html_a_markdown(html):
if not rows:
continue
markdown_table = []
max_widths = []
# Matriz para almacenar la tabla procesada
table_matrix = []
max_cols = 0
# Calcular anchos máximos
for row in rows:
# Primera pasada: crear matriz y procesar rowspans/colspans
row_idx = 0
while row_idx < len(rows):
row = rows[row_idx]
cells = row.find_all(['th', 'td'])
while len(max_widths) < len(cells):
max_widths.append(0)
for i, cell in enumerate(cells):
if not cells:
row_idx += 1
continue
# Expandir matriz si es necesario
while len(table_matrix) <= row_idx:
table_matrix.append([])
col_idx = 0
for cell in cells:
# Encontrar la siguiente columna disponible
while col_idx < len(table_matrix[row_idx]) and table_matrix[row_idx][col_idx] is not None:
col_idx += 1
# Obtener rowspan y colspan
rowspan = int(cell.get('rowspan', 1))
colspan = int(cell.get('colspan', 1))
# Procesar el texto de la celda reemplazando saltos de línea por <br>
cell_text = cell.get_text().strip()
max_widths[i] = max(max_widths[i], len(cell_text))
cell_text = cell_text.replace('\n', '<br>')
cell_text = re.sub(r'\s*<br>\s*<br>\s*', '<br>', cell_text) # Eliminar <br> múltiples
cell_text = cell_text.strip()
# Rellenar la matriz con el texto y None para las celdas combinadas
for r in range(rowspan):
current_row = row_idx + r
# Expandir matriz si es necesario
while len(table_matrix) <= current_row:
table_matrix.append([])
# Expandir fila si es necesario
while len(table_matrix[current_row]) <= col_idx + colspan - 1:
table_matrix[current_row].append(None)
for c in range(colspan):
if r == 0 and c == 0:
table_matrix[current_row][col_idx + c] = cell_text
else:
table_matrix[current_row][col_idx + c] = ''
col_idx += colspan
max_cols = max(max_cols, col_idx)
row_idx += 1
# Construir tabla markdown
if max_widths: # Solo si tenemos celdas válidas
header_row = rows[0].find_all(['th', 'td'])
header = '| ' + ' | '.join(cell.get_text().strip().ljust(max_widths[i])
for i, cell in enumerate(header_row)) + ' |'
separator = '|' + '|'.join('-' * (width + 2) for width in max_widths) + '|'
# Asegurar que todas las filas tengan el mismo número de columnas
for row in table_matrix:
while len(row) < max_cols:
row.append('')
# Calcular anchos máximos por columna
col_widths = [0] * max_cols
for row in table_matrix:
for col_idx, cell in enumerate(row):
if cell is not None:
col_widths[col_idx] = max(col_widths[col_idx], len(str(cell)))
# Generar tabla Markdown
markdown_table = []
# Cabecera
if table_matrix:
header = '|'
for col_idx, width in enumerate(col_widths):
cell = str(table_matrix[0][col_idx] or '')
header += f' {cell.ljust(width)} |'
markdown_table.append(header)
# Separador
separator = '|'
for width in col_widths:
separator += '-' * (width + 2) + '|'
markdown_table.append(separator)
for row in rows[1:]:
cells = row.find_all(['td', 'th'])
row_text = '| ' + ' | '.join(cell.get_text().strip().ljust(max_widths[i])
for i, cell in enumerate(cells)) + ' |'
# Contenido
for row_idx in range(1, len(table_matrix)):
row_text = '|'
for col_idx, width in enumerate(col_widths):
cell = str(table_matrix[row_idx][col_idx] or '')
row_text += f' {cell.ljust(width)} |'
markdown_table.append(row_text)
table.replace_with(soup.new_string('\n' + '\n'.join(markdown_table)))
# Reemplazar la tabla HTML con la versión Markdown
if markdown_table:
table.replace_with(soup.new_string('\n' + '\n'.join(markdown_table) + '\n'))
except Exception as e:
print(f"Error procesando tabla: {str(e)}")
continue

View File

@ -1,3 +1,11 @@
{
"path": "C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails"
"path": "C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\EmailTody",
"history": [
"C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\EmailTody",
"C:\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Reporte\\Emails",
"C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\30 - 9.3941- Kosme - Portogallo (Modifica + Linea)\\Emails",
"C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\Emails\\Trial",
"C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\Emails",
"C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\Emails\\Error de tablas"
]
}

View File

@ -1,3 +1,6 @@
{
"path": "C:/Estudio"
"path": "C:\\Estudio",
"history": [
"C:\\Estudio"
]
}

View File

@ -0,0 +1,3 @@
{
}

View File

@ -0,0 +1,6 @@
{
"name": "RAGEX",
"description": "This script processes text files into a chronological narrative in Markdown format, optimized for processing with Large Language Models (LLMs). It extracts essential information from text files while removing unnecessary metadata, creating a clean, temporal narrative that can be easily analyzed.",
"version": "1.0",
"author": "Miguel"
}

View File

@ -0,0 +1,3 @@
{
}

View File

@ -0,0 +1,21 @@
{
"type": "object",
"properties": {
"in_dir": {
"type": "string",
"format": "directory",
"title": "Subdirectorio desde donde hacer la ingesta de los datos",
"description": "Subdirectorio desde donde hacer la ingesta de los datos"
},
"model": {
"type": "string",
"title": "Model",
"description": "OpenAI Model"
},
"query": {
"type": "string",
"title": "Consulta",
"description": "Consulta"
}
}
}

View File

@ -0,0 +1,3 @@
# Configura tu clave API de OpenAI
def openai_api_key():
return 'sk-HIY5DSK03Lr'

View File

@ -0,0 +1,6 @@
{
"path": "D:\\Proyectos\\Scripts\\RAG\\TEST",
"history": [
"D:\\Proyectos\\Scripts\\RAG\\TEST"
]
}

View File

@ -0,0 +1,112 @@
"""
Este script realiza la ingesta de los datos alammacenados en el subdirectorio de ingesta.
"""
import os
import sys
from pathlib import Path
import json
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings # Cambiado a OpenAI Embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
import os
import glob
def load_documents(directory):
documents = []
# Cargar archivos markdown
for md_file in glob.glob(os.path.join(directory, "**/*.md"), recursive=True):
with open(md_file, "r", encoding="utf-8") as f:
content = f.read()
documents.append(
{
"content": content,
"metadata": {"source": md_file, "type": "markdown"},
}
)
# Cargar archivos de texto
for txt_file in glob.glob(os.path.join(directory, "**/*.txt"), recursive=True):
with open(txt_file, "r", encoding="utf-8") as f:
content = f.read()
documents.append(
{"content": content, "metadata": {"source": txt_file, "type": "text"}}
)
return documents
def process_documents(documents, db_directory):
# Usar OpenAI Embeddings en lugar de HuggingFace
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# Dividir documentos en chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n## ", "\n### ", "\n#### ", "\n", " ", ""],
keep_separator=True,
)
docs = []
for doc in documents:
chunks = text_splitter.split_text(doc["content"])
for i, chunk in enumerate(chunks):
docs.append(
Document(
page_content=chunk,
metadata={
**doc["metadata"],
"chunk_id": i,
"chunk": chunk[:100] + "...", # Extracto para referencia
},
)
)
# Configurar Chroma para evitar dependencia de ONNX
from chromadb.config import Settings
# Crear o actualizar la base de datos vectorial con configuración específica
db = Chroma.from_documents(
docs,
embeddings,
persist_directory=db_directory,
client_settings=Settings(anonymized_telemetry=False, is_persistent=True),
)
db.persist()
print(f"Procesados {len(docs)} fragmentos de {len(documents)} documentos")
return db
def main():
# Cargar configuraciones del entorno
configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}"))
# Obtener working directory
working_directory = configs.get("working_directory", ".")
# Obtener configuraciones de nivel 2 (grupo)
group_config = configs.get("level2", {})
work_config = configs.get("level3", {})
in_dir = work_config.get("in_dir", ".")
docs_directory = os.path.join(working_directory, in_dir)
db_directory = os.path.join(working_directory, "chroma_db")
print("Cargando documentos...")
documents = load_documents(docs_directory)
print(f"Se encontraron {len(documents)} documentos.")
print("Procesando e indexando documentos...")
db = process_documents(documents, db_directory)
print("¡Ingesta completada con éxito!")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,126 @@
"""
Este script realiza la consulta usando RAGEX a la base de datos de documentos.
"""
import os
import sys
from pathlib import Path
import json
from langchain_community.vectorstores import Chroma
from langchain_openai import (
OpenAIEmbeddings,
) # Cambiado de HuggingFaceEmbeddings a OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from rich.console import Console
from rich.markdown import Markdown
import os
import argparse
from openai_api_key import openai_api_key
console = Console()
class CitationTracker:
def __init__(self):
self.citations = []
def add_citation(self, text, metadata):
self.citations.append({"text": text, "metadata": metadata})
def get_formatted_citations(self):
result = "\n## Fuentes\n\n"
for i, citation in enumerate(self.citations, 1):
source = citation["metadata"]["source"]
result += f"{i}. [{os.path.basename(source)}]({source}) - Fragmento {citation['metadata']['chunk_id']}\n"
return result
def search_with_citation(query, db_directory, model="gpt-3.5-turbo"):
# Cargar embeddings y base de datos
embeddings = OpenAIEmbeddings(
model="text-embedding-3-small"
) # Usar OpenAI Embeddings igual que en x1.py
db = Chroma(persist_directory=db_directory, embedding_function=embeddings)
api_key = openai_api_key()
os.environ["OPENAI_API_KEY"] = api_key
# Configurar el LLM de OpenAI
llm = ChatOpenAI(model_name=model)
# Rastreador de citas
citation_tracker = CitationTracker()
# Recuperar documentos relevantes
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# Plantilla para el prompt
template = """
Responde a la siguiente pregunta basándote exclusivamente en la información proporcionada.
Incluye referencias a las fuentes originales para cada afirmación importante usando [Fuente N].
Si la información no es suficiente, indícalo claramente.
Contexto:
{context}
Pregunta: {question}
Respuesta (incluye [Fuente N] para citar):
"""
prompt = ChatPromptTemplate.from_template(template)
# Función para formatear el contexto
def format_docs(docs):
formatted_context = ""
for i, doc in enumerate(docs, 1):
citation_tracker.add_citation(doc.page_content, doc.metadata)
formatted_context += f"[Fuente {i}]: {doc.page_content}\n\n"
return formatted_context
# Cadena RAG
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Ejecutar búsqueda
response = rag_chain.invoke(query)
# Agregar citas al final
full_response = response + "\n\n" + citation_tracker.get_formatted_citations()
return full_response
def main():
# Cargar configuraciones del entorno
configs = json.loads(os.environ.get("SCRIPT_CONFIGS", "{}"))
# Obtener working directory
working_directory = configs.get("working_directory", ".")
# Obtener configuraciones de nivel 2 (grupo)
group_config = configs.get("level2", {})
work_config = configs.get("level3", {})
in_dir = work_config.get("in_dir", ".")
docs_directory = os.path.join(working_directory, in_dir)
model = work_config.get("model", "gpt-3.5-turbo")
query = work_config.get("query", "")
db_directory = os.path.join(working_directory, "chroma_db")
result = search_with_citation(query, db_directory, model)
console.print(Markdown(result))
if __name__ == "__main__":
main()

View File

@ -454,6 +454,9 @@ class ConfigurationManager:
with open(work_dir_path, "r") as f:
data = json.load(f)
path = data.get("path", "")
# Normalizar separadores de ruta
if path:
path = os.path.normpath(path)
# Actualizar la variable de instancia si hay una ruta válida
if path and os.path.exists(path):
self.working_directory = path
@ -462,16 +465,45 @@ class ConfigurationManager:
return ""
def set_work_dir(self, group: str, path: str) -> Dict[str, str]:
"""Set working directory path for a script group."""
"""Set working directory path for a script group and update history."""
# Normalizar el path recibido
path = os.path.normpath(path)
if not os.path.exists(path):
return {"status": "error", "message": "Directory does not exist"}
work_dir_path = os.path.join(self.script_groups_path, group, "work_dir.json")
try:
# Guardar la ruta en work_dir.json
# Cargar datos existentes o crear nuevos
try:
with open(work_dir_path, "r") as f:
data = json.load(f)
# Normalizar paths existentes en el historial
if "history" in data:
data["history"] = [os.path.normpath(p) for p in data["history"]]
except (FileNotFoundError, json.JSONDecodeError):
data = {"path": "", "history": []}
# Actualizar path actual
data["path"] = path
# Actualizar historial
if "history" not in data:
data["history"] = []
# Eliminar la ruta del historial si ya existe (usando path normalizado)
data["history"] = [p for p in data["history"] if os.path.normpath(p) != path]
# Agregar la ruta al principio del historial
data["history"].insert(0, path)
# Mantener solo los últimos 10 directorios
data["history"] = data["history"][:10]
# Guardar datos actualizados
with open(work_dir_path, "w") as f:
json.dump({"path": path}, f, indent=2)
json.dump(data, f, indent=2)
# Actualizar la variable de instancia
self.working_directory = path
@ -485,3 +517,16 @@ class ConfigurationManager:
return {"status": "success", "path": path}
except Exception as e:
return {"status": "error", "message": str(e)}
def get_directory_history(self, group: str) -> List[str]:
"""Get the directory history for a script group."""
work_dir_path = os.path.join(self.script_groups_path, group, "work_dir.json")
try:
with open(work_dir_path, "r") as f:
data = json.load(f)
# Normalizar todos los paths en el historial
history = [os.path.normpath(p) for p in data.get("history", [])]
# Filtrar solo directorios que existen
return [p for p in history if os.path.exists(p)]
except (FileNotFoundError, json.JSONDecodeError):
return []

View File

@ -1,116 +1,8 @@
[09:40:16] Iniciando ejecución de x1.py
[09:40:18] Working directory: C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails
[09:40:18] Input directory: C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails
[09:40:18] Output directory: C:/Users/migue/OneDrive/Miguel/Obsidean/Trabajo/VM/04-InLavoro/HENKEL/93040 - HENKEL - BowlingGreen/Description/HENKEL - ALPLA - AUTEFA - Batch Data
[09:40:18] Cronologia file: C:/Users/migue/OneDrive/Miguel/Obsidean/Trabajo/VM/04-InLavoro/HENKEL/93040 - HENKEL - BowlingGreen/Description/HENKEL - ALPLA - AUTEFA - Batch Data\cronologia.md
[09:40:18] Attachments directory: C:/Trabajo/VM/40 - 93040 - HENKEL - NEXT2 Problem/Reporte/Emails\adjuntos
[09:40:18] Beautify rules file: D:\Proyectos\Scripts\ParamManagerScripts\backend\script_groups\EmailCrono\config\beautify_rules.json
[09:40:18] Found 13 .eml files
[09:40:18] Loaded 0 existing messages
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ 9.3040-3074 ALPLA BG open points.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ 9.3061-TLO26-L42 automatic change over test.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ Alpla BOW2 - Line emptying button 6168.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ IDH_BTL.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ OPC-UA interface Vetro - Bowling Green 2 9.3040-3074.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\I_ R_ I_ [EXT] R_ Vetro Conveyor 9.3674.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\NEXT2 - Data - ALPLA information verification.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RE_ Automatic changeover trial.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RE_ OPC-UA interface Vetro - Bowling Green 2 9.3040-3074.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RE_ [EXT] RE_ Vetro_ALPLA information verification.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\Re_ _EXT_ Next + 1 - HENKEL - ALPLA - AUTEFA.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\RV_ RE_ [EXT] RE_ Vetro_ALPLA information verification.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Processing C:\Trabajo\VM\40 - 93040 - HENKEL - NEXT2 Problem\Reporte\Emails\R_ [EXT] RE_ Vetro_ALPLA information verification 9.3060-3067.eml
[09:40:18] Aplicando reglas de prioridad 1
[09:40:18] Aplicando reglas de prioridad 2
[09:40:18] Aplicando reglas de prioridad 3
[09:40:18] Aplicando reglas de prioridad 4
[09:40:18] Estadísticas de procesamiento:
[09:40:18] - Total mensajes encontrados: 38
[09:40:18] - Mensajes únicos añadidos: 22
[09:40:18] - Mensajes duplicados ignorados: 16
[09:40:18] Writing 22 messages to C:/Users/migue/OneDrive/Miguel/Obsidean/Trabajo/VM/04-InLavoro/HENKEL/93040 - HENKEL - BowlingGreen/Description/HENKEL - ALPLA - AUTEFA - Batch Data\cronologia.md
[09:40:18] Ejecución completada
[20:41:53] Iniciando ejecución de x1.py
[20:41:57] Cargando documentos...
[20:41:57] Se encontraron 1 documentos.
[20:41:57] Procesando e indexando documentos...
[20:44:57] Iniciando ejecución de x1.py
[20:45:01] Cargando documentos...
[20:45:01] Se encontraron 1 documentos.
[20:45:01] Procesando e indexando documentos...

View File

@ -0,0 +1,3 @@
# Configura tu clave API de OpenAI
def openai_api_key():
return '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'

View File

@ -6,11 +6,11 @@ from openai import OpenAI
from typing import Dict, List
import json
from .base import LLMService
from config.api_keys import APIKeyManager
from openai_api_key import openai_api_key
class OpenAIService(LLMService):
def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3):
api_key = APIKeyManager.get_openai_key()
api_key = openai_api_key()
if not api_key:
raise ValueError("OpenAI API key not found. Please set up your API keys.")

View File

@ -535,6 +535,7 @@ async function setWorkingDirectory() {
await updateWorkingDirectory(path);
}
// Modificar initWorkingDirectory para cargar también el historial
async function initWorkingDirectory() {
if (!currentGroup) return;
@ -543,6 +544,7 @@ async function initWorkingDirectory() {
if (result.status === 'success' && result.path) {
await updateWorkingDirectory(result.path);
}
await loadDirectoryHistory();
}
async function browseDirectory() {
@ -565,28 +567,64 @@ async function browseDirectory() {
async function updateWorkingDirectory(path) {
console.log('Updating working directory:', { path, group: currentGroup }); // Debug line
const response = await fetch('/api/working-directory', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
path: path,
group: currentGroup
})
});
const result = await response.json();
console.log('Update result:', result); // Debug line
if (result.status === 'success') {
// Actualizar input
document.getElementById('working-directory').value = path;
try {
const response = await fetch('/api/working-directory', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
path: path,
group: currentGroup
})
});
// Recargar configuración de nivel 3
const configResponse = await fetch(`/api/config/3?group=${currentGroup}`);
const data = await configResponse.json();
await renderForm('level3-form', data);
} else {
alert('Error: ' + (result.message || 'No se pudo actualizar el directorio de trabajo'));
const result = await response.json();
console.log('Update result:', result); // Debug line
if (result.status === 'success') {
// Actualizar input y lista de directorios
document.getElementById('working-directory').value = path;
await loadDirectoryHistory();
// Recargar configuración de nivel 3
const configResponse = await fetch(`/api/config/3?group=${currentGroup}`);
const data = await configResponse.json();
await renderForm('level3-form', data);
} else {
alert('Error: ' + (result.message || 'No se pudo actualizar el directorio de trabajo'));
}
} catch (error) {
console.error('Error updating working directory:', error);
alert('Error actualizando el directorio de trabajo: ' + error.message);
}
}
async function loadDirectoryHistory() {
try {
const response = await fetch(`/api/directory-history/${currentGroup}`);
const history = await response.json();
const select = document.getElementById('directory-history');
select.innerHTML = '<option value="">-- Directorios recientes --</option>';
history.forEach(dir => {
const option = document.createElement('option');
option.value = dir;
option.textContent = dir;
// Marcar como seleccionado si es el directorio actual
if (dir === document.getElementById('working-directory').value) {
option.selected = true;
}
select.appendChild(option);
});
} catch (error) {
console.error('Error loading directory history:', error);
}
}
function loadHistoryDirectory(path) {
if (path) {
document.getElementById('working-directory').value = path;
updateWorkingDirectory(path); // Cambiado de setWorkingDirectory a updateWorkingDirectory
}
}
@ -657,6 +695,16 @@ async function initializeApp() {
await initWorkingDirectory();
await loadConfigs();
// Mostrar level3-content por defecto
const level3Content = document.getElementById('level3-content');
if (level3Content) {
level3Content.classList.remove('hidden');
const button = document.querySelector(`[onclick="toggleConfig('level3-content')"]`);
if (button) {
button.innerText = 'Ocultar Configuración';
}
}
} catch (error) {
console.error('Error during initialization:', error);
}

View File

@ -109,6 +109,12 @@
Confirmar
</button>
</div>
<!-- Add directory history dropdown -->
<div class="mt-2">
<select id="directory-history" class="w-full p-2 border rounded text-gray-600" onchange="loadHistoryDirectory(this.value)">
<option value="">-- Directorios recientes --</option>
</select>
</div>
</div>
<!-- Level 3 Configuration -->
@ -117,10 +123,10 @@
<h2 class="text-xl font-bold">Configuración del Directorio</h2>
<button class="bg-blue-500 text-white px-4 py-2 rounded"
onclick="toggleConfig('level3-content')">
Mostrar Configuración
Ocultar Configuración
</button>
</div>
<div id="level3-content" class="hidden">
<div id="level3-content">
<div id="level3-form"></div>
<div class="flex justify-end mt-4">
<button class="bg-blue-500 text-white px-4 py-2 rounded" onclick="modifySchema(3)">