Primera Version

2025-04-25 22:44:51 +02:00 · 2025-04-25 22:44:51 +02:00 · ba35b08017
commit ba35b08017
4 changed files with 512 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,178 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+
+*.wav
+*.mp4
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,124 @@
+# Cliente Python para Whisper ASR Webservice Local
+
+Este proyecto contiene un script de Python diseñado para interactuar con una instancia local del [Whisper ASR Webservice](https://github.com/ahmetoner/whisper-asr-webservice) (u otro compatible) corriendo en Docker. El script puede tomar un archivo de audio o video como entrada, extraer/convertir el audio a formato WAV, enviarlo al servicio para transcripción, y mostrar los resultados detallados, incluyendo el tiempo total empleado.
+
+## Descripción
+
+El script automatiza el proceso de:
+1.  Tomar un archivo de entrada (probado con MP4, pero adaptable a otros formatos soportados por MoviePy/FFmpeg).
+2.  Convertir el audio del archivo de entrada a formato WAV (`pcm_s16le`), necesario o preferido por muchos sistemas ASR. Utiliza `moviepy` (que a su vez usa FFmpeg) para esta conversión.
+3.  Comunicarse con el endpoint `/asr` de un servicio Whisper ASR Webservice local.
+4.  Enviar el archivo WAV al servicio, especificando parámetros como la tarea (`transcribe` o `translate`), idioma (o autodetección), y formato de salida esperado (`json`).
+5.  Manejar la comunicación HTTP, enviando los parámetros de control en la URL (como requiere la API consultada) y estableciendo cabeceras adecuadas (`Accept: application/json`).
+6.  Interpretar la respuesta del servidor, incluso si este devuelve un `Content-Type` incorrecto (como `text/plain`) pero el cuerpo contiene JSON válido.
+7.  Mostrar un resumen de la comunicación (URL, estado, cabeceras, cuerpo de respuesta raw).
+8.  Presentar la transcripción final de forma clara, incluyendo idioma detectado, texto completo y segmentos individuales con marcas de tiempo.
+9.  Medir y mostrar el tiempo total de ejecución del proceso.
+
+## Características Principales
+
+* Soporte para múltiples formatos de entrada (dependiente de FFmpeg).
+* Conversión de audio a WAV estandarizado.
+* Comunicación específica adaptada a la API del Webservice (parámetros en URL).
+* Interpretación robusta de la respuesta JSON (incluso con `Content-Type` incorrecto).
+* Visualización detallada de la transcripción (texto completo y segmentos).
+* Medición del tiempo de conversión, llamada API y total.
+* Salida informativa del proceso y la comunicación.
+
+## Requisitos Previos
+
+1.  **Python:** Versión 3.7 o superior (requerido por las versiones recientes de MoviePy).
+2.  **Librerías Python:**
+    * `requests`: Para realizar las llamadas HTTP.
+    * `moviepy`: Para la manipulación y conversión de audio/video.
+3.  **FFmpeg:** **Esencial.** MoviePy depende de FFmpeg para leer la mayoría de los formatos de audio/video (incluyendo MP4) y para realizar las conversiones. Debes [descargar e instalar FFmpeg](https://ffmpeg.org/download.html) y asegurarte de que el ejecutable `ffmpeg` esté accesible en el PATH de tu sistema. Puedes verificarlo ejecutando `ffmpeg -version` en tu terminal.
+4.  **Instancia de Whisper ASR Webservice:** El script está diseñado para conectarse a un servicio **local**, típicamente corriendo en **Docker**. Debes tener una instancia funcionando y accesible desde la máquina donde ejecutas el script. El ejemplo usa la implementación de `ahmetoner/whisper-asr-webservice`, pero podría funcionar con otras APIs similares.
+
+## Instalación
+
+1.  **Clona o descarga** este repositorio/script.
+2.  **Crea y activa un entorno virtual** (recomendado):
+    * Usando `venv`:
+        ```bash
+        python -m venv .venv
+        # En Windows:
+        .\.venv\Scripts\activate
+        # En Linux/macOS:
+        source .venv/bin/activate
+        ```
+    * Usando `conda`:
+        ```bash
+        conda create -n whisper_env python=3.9 # O la versión que prefieras >= 3.7
+        conda activate whisper_env
+        ```
+3.  **Instala las dependencias de Python:**
+    ```bash
+    pip install requests moviepy
+    ```
+4.  **Instala FFmpeg:** Sigue las instrucciones en [ffmpeg.org](https://ffmpeg.org/download.html) para tu sistema operativo. Verifica que esté en el PATH.
+5.  **Configura y ejecuta tu contenedor Docker** de Whisper ASR Webservice. Consulta la documentación de la imagen Docker que estés usando. Asegúrate de conocer la dirección IP y el puerto donde está escuchando (p. ej., `192.168.88.26:9005`).
+
+## Configuración del Script
+
+Abre el archivo de script Python (`.py`) y modifica las variables en la sección `# --- Configuración ---` según tus necesidades:
+
+* `WHISPER_API_URL`: **¡Importante!** Cambia esto a la URL base de tu servicio Docker (p. ej., `"http://TU_IP_NAS:PUERTO/asr"`).
+* `INPUT_MP4_PATH`: Ruta a tu archivo de video o audio de entrada.
+* `TEMP_WAV_BASENAME`: Nombre base para los archivos WAV temporales que se crean.
+* `API_LANGUAGE`: Código del idioma del audio (p.ej., `'es'`, `'en'`) o déjalo como `None` para que Whisper intente detectarlo automáticamente.
+* `API_TASK`: La tarea a realizar. `'transcribe'` (audio a texto en el mismo idioma) o `'translate'` (audio a texto en Inglés).
+* `API_OUTPUT_FORMAT`: Formato de salida solicitado (probablemente `'json'` para obtener detalles como segmentos).
+* `API_ENCODE`: Parámetro de la API (generalmente `True`).
+
+## Uso
+
+1.  Asegúrate de que tu entorno virtual (si usas uno) esté activado.
+2.  Asegúrate de que tu contenedor Docker de Whisper ASR Webservice esté corriendo y sea accesible en la URL configurada.
+3.  Ejecuta el script desde la terminal:
+    ```bash
+    python tu_script.py
+    ```
+    (Reemplaza `tu_script.py` con el nombre real de tu archivo).
+
+El script mostrará mensajes indicando el progreso (conversión, envío a la API), detalles de la comunicación HTTP, y finalmente el resultado de la transcripción interpretado y el tiempo total empleado.
+
+## Ejemplo de Salida (Sección Final)
+
+Limpiando archivo temporal 'd:\Proyectos\Scripts\Whisper\from_docker\temp_audio_direct_conversion_1745613564.wav'...
+Limpieza completada.
+
+--- Resultado Final Interpretado ---
+¡Respuesta interpretada como JSON exitosamente!
+
+Idioma Detectado: en
+
+Transcripción Completa:
+-------------------------
+--- Resultado Final Interpretado ---
+¡Respuesta interpretada como JSON exitosamente!
+
+Idioma Detectado: en
+
+Transcripción Completa:
+-------------------------
+Idioma Detectado: en
+
+Transcripción Completa:
+-------------------------
+Transcripción Completa:
+-------------------------
+-------------------------
+ Listen and match. 1. Hello, my name's Max. I'm from Germany. In Germany, we hang Easter eggs in the trees. In my garden, there's a tree with eggs. Yellow, green, blue, orange, pink and purple. 2. Hi, my name's Daria. I'm from Ukraine. Easter eggs in Ukraine are lots of colors. There are patterns with triangles, circles and stars. They're beautiful. 3. Hi, my name's Adam. I'm from Hungary. In Hungary, we paint Easter eggs. We decorate the eggs with flowers. The flowers are all different colors. 4. Hi, my name's Sophia. I'm from Greece. Easter eggs in Greece are red. 5. We dye the eggs. No patterns, just red eggs.
+-------------------------
+
+Segmentos Detallados:
+  [00:00:00.000 --> 00:00:22.000] Listen and match. 1. Hello, my name's Max. I'm from Germany. In Germany, we hang Easter
+  [00:00:22.000 --> 00:00:40.000] eggs in the trees. In my garden, there's a tree with eggs. Yellow, green, blue, orange, pink and purple.
+  [00:00:40.000 --> 00:00:51.000] 2. Hi, my name's Daria. I'm from Ukraine. Easter eggs in Ukraine are lots of colors.
+  [00:00:51.000 --> 00:01:02.000] There are patterns with triangles, circles and stars. They're beautiful.
+  [00:01:02.000 --> 00:01:18.000] 3. Hi, my name's Adam. I'm from Hungary. In Hungary, we paint Easter eggs. We decorate the eggs with flowers.
+  [00:01:18.000 --> 00:01:38.000] The flowers are all different colors. 4. Hi, my name's Sophia. I'm from Greece. Easter eggs in Greece are red.
+  [00:01:38.000 --> 00:01:46.000] 5. We dye the eggs. No patterns, just red eggs.
+----------------------------------
+
+--- Tiempo Total Empleado: 150.62 segundos (00:02:30.623) ---
--- a/x1.py
+++ b/x1.py
@ -0,0 +1,207 @@
+import requests
+import os
+import time
+import traceback
+import json # Necesitamos importar json para cargar la cadena
+
+# Importación para MoviePy v2.x
+from moviepy import AudioFileClip
+
+# --- Configuración ---
+WHISPER_API_URL = "http://192.168.88.26:9005/asr"
+INPUT_MP4_PATH = "test.mp4"
+TEMP_WAV_BASENAME = "temp_audio_direct_conversion"
+API_LANGUAGE = None
+API_TASK = 'transcribe'
+API_OUTPUT_FORMAT = 'json' # Seguimos pidiéndolo, aunque lo ignoren
+API_ENCODE = True
+# --- Fin de la Configuración ---
+
+# La función convert_mp4_to_wav se mantiene igual
+def convert_mp4_to_wav(input_path, output_wav_path):
+    # (Código omitido por brevedad - idéntico a la respuesta anterior)
+    print(f"Intentando convertir '{os.path.basename(input_path)}' a WAV...")
+    print(f"  Entrada: {input_path}")
+    print(f"  Salida: {output_wav_path}")
+    audio_clip = None
+    try:
+        audio_clip = AudioFileClip(input_path)
+        audio_clip.write_audiofile(output_wav_path, codec='pcm_s16le', logger=None)
+        print("Conversión a WAV exitosa.")
+        return output_wav_path
+    except Exception as e:
+        print(f"ERROR durante la conversión a WAV: {type(e).__name__} - {e}")
+        return None
+    finally:
+        if audio_clip:
+            try: audio_clip.close()
+            except Exception: pass
+
+
+def transcribe_audio_api(api_url_base, file_path, encode=True, task='transcribe', language=None, output_format='json'):
+    """
+    Envía audio, muestra detalles y INTENTA interpretar la respuesta como JSON
+    incluso si el Content-Type es incorrecto.
+    """
+    print(f"\n--- Enviando Audio a la API Whisper ---")
+    print(f"Archivo: '{os.path.basename(file_path)}'")
+    url_params = {'encode': str(encode).lower(), 'task': task, 'output': output_format}
+    if language: url_params['language'] = language
+    print(f"Parámetros para URL: {url_params}")
+
+    filename = os.path.basename(file_path)
+    f = None
+    try:
+        f = open(file_path, 'rb')
+        files_payload = {'audio_file': (filename, f, 'audio/wav')}
+        request_headers = {'Accept': 'application/json'}
+        print(f"Cabeceras explícitas: {request_headers}")
+        print(f"Realizando POST a: {api_url_base}")
+
+        response = requests.post(
+            api_url_base, params=url_params, files=files_payload,
+            headers=request_headers, timeout=300
+        )
+
+        print("\n--- Detalles de la Comunicación ---")
+        print(f"URL Final Enviada: {response.request.url}")
+        print(f"Código de Estado Recibido: {response.status_code} ({response.reason})")
+        received_content_type = response.headers.get('Content-Type', 'N/A')
+        print(f"Content-Type Recibido: {received_content_type}")
+        print("\n--- Cuerpo de la Respuesta (Raw Text) ---")
+        response_text = ""
+        try:
+            response_text = response.content.decode('utf-8', errors='replace')
+            # Mostramos solo una parte si es muy largo, para no saturar
+            preview_limit = 500
+            print(response_text[:preview_limit] + ('...' if len(response_text) > preview_limit else ''))
+        except Exception as decode_err:
+            print(f"[Error al decodificar: {decode_err}] Bytes: {response.content[:preview_limit]}...")
+        print("-----------------------------------")
+
+        response.raise_for_status() # Verificar errores 4xx/5xx
+
+        # --- Interpretación Mejorada de la Respuesta ---
+        if response.status_code == 200 and response_text.strip():
+            # Advertir si el Content-Type no es JSON pero intentaremos igual
+            if not received_content_type.startswith('application/json'):
+                print("\nADVERTENCIA: Content-Type no es JSON, pero se intentará interpretar el cuerpo.")
+
+            try:
+                # Intentar cargar el texto como JSON
+                data = json.loads(response_text)
+                print("Interpretación como JSON exitosa.")
+                return data # Devolver el diccionario Python
+            except json.JSONDecodeError as json_err:
+                print(f"ERROR: No se pudo interpretar la respuesta como JSON: {json_err}")
+                print("Devolviendo la respuesta como texto plano.")
+                return response_text # Devolver texto si falla la interpretación
+        else:
+             # Respuesta vacía o código de error no capturado por raise_for_status
+            print("Respuesta recibida vacía o con estado inesperado (no 200 OK).")
+            return response_text # Devolver el texto (probablemente vacío)
+
+    # ... (resto del manejo de excepciones de requests igual que antes) ...
+    except requests.exceptions.Timeout: print("ERROR: Timeout.")
+    except requests.exceptions.ConnectionError as e: print(f"ERROR de Conexión: {e}")
+    except requests.exceptions.HTTPError as e: print(f"ERROR HTTP {e.response.status_code}.")
+    except requests.exceptions.RequestException as e: print(f"ERROR de Request: {e}")
+    except Exception as e: print(f"ERROR inesperado en API: {type(e).__name__} - {e}")
+    finally:
+        if f:
+            try: f.close()
+            except Exception: pass
+    return None
+
+# --- Función para Formatear Tiempo ---
+def format_time(seconds):
+    """Convierte segundos a formato HH:MM:SS.mmm"""
+    millis = int(seconds * 1000) % 1000
+    total_seconds = int(seconds)
+    secs = total_seconds % 60
+    mins = (total_seconds // 60) % 60
+    hours = total_seconds // 3600
+    return f"{hours:02}:{mins:02}:{secs:02}.{millis:03}"
+
+# --- Ejecución Principal ---
+if __name__ == "__main__":
+    print("--- Iniciando Script Mejorado ---")
+    overall_start_time = time.perf_counter()
+
+    temp_wav_file_path = os.path.join(
+        os.path.dirname(__file__) or '.',
+        f"{TEMP_WAV_BASENAME}_{int(time.time())}.wav"
+    )
+    transcription_result_data = None
+    prepared_audio_path = None
+
+    # 1. Verificar archivo de entrada
+    if not os.path.exists(INPUT_MP4_PATH):
+        print(f"Error Crítico: El archivo de entrada '{INPUT_MP4_PATH}' no existe.")
+    else:
+        # 2. Convertir a WAV
+        conv_start_time = time.perf_counter()
+        prepared_audio_path = convert_mp4_to_wav(INPUT_MP4_PATH, temp_wav_file_path)
+        conv_end_time = time.perf_counter()
+        if prepared_audio_path:
+            print(f"(Tiempo de conversión: {conv_end_time - conv_start_time:.2f} segundos)")
+
+            # 3. Transcribir si la conversión fue exitosa
+            if os.path.exists(prepared_audio_path):
+                api_start_time = time.perf_counter()
+                transcription_result_data = transcribe_audio_api(
+                    api_url_base=WHISPER_API_URL,
+                    file_path=prepared_audio_path,
+                    encode=API_ENCODE,
+                    task=API_TASK,
+                    language=API_LANGUAGE,
+                    output_format=API_OUTPUT_FORMAT
+                )
+                api_end_time = time.perf_counter()
+                print(f"(Tiempo de llamada API: {api_end_time - api_start_time:.2f} segundos)")
+            else:
+                print(f"ERROR: El archivo WAV convertido '{prepared_audio_path}' no se encontró.")
+        else:
+            print("La conversión a WAV falló. No se puede continuar.")
+
+    # 4. Limpieza
+    if os.path.exists(temp_wav_file_path):
+        try:
+            print(f"\nLimpiando archivo temporal '{temp_wav_file_path}'...")
+            os.remove(temp_wav_file_path)
+            print("Limpieza completada.")
+        except OSError as e:
+            print(f"Error al eliminar archivo temporal: {e}")
+
+    # 5. Mostrar Resultado Final Interpretado
+    print("\n--- Resultado Final Interpretado ---")
+    if isinstance(transcription_result_data, dict):
+        print("¡Respuesta interpretada como JSON exitosamente!")
+        lang = transcription_result_data.get('language', 'N/D')
+        full_text = transcription_result_data.get('text', '[Texto no encontrado]')
+        print(f"\nIdioma Detectado: {lang}")
+        print(f"\nTranscripción Completa:\n{'-'*25}\n{full_text}\n{'-'*25}")
+
+        # Mostrar segmentos si existen
+        segments = transcription_result_data.get('segments')
+        if segments and isinstance(segments, list):
+            print("\nSegmentos Detallados:")
+            for segment in segments:
+                start = segment.get('start', 0.0)
+                end = segment.get('end', 0.0)
+                text = segment.get('text', '')
+                print(f"  [{format_time(start)} --> {format_time(end)}] {text.strip()}")
+        else:
+            print("\n(No se encontraron detalles de segmentos en el JSON)")
+
+    elif isinstance(transcription_result_data, str):
+         print("Se recibió una respuesta de texto plano que no pudo ser interpretada como JSON:")
+         print(transcription_result_data)
+    else:
+        print("No se obtuvo resultado de la transcripción o hubo un error previo.")
+    print("----------------------------------")
+
+    overall_end_time = time.perf_counter()
+    elapsed_time = overall_end_time - overall_start_time
+    print(f"\n--- Tiempo Total Empleado: {elapsed_time:.2f} segundos ({format_time(elapsed_time)}) ---")
+    
--- a/x2.py
+++ b/x2.py
@ -0,0 +1,3 @@
+import moviepy.
+print(moviepy.editor.__file__)
+exit()