using Newtonsoft.Json; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net.Http; using System.Text; using System.Threading.Tasks; namespace GTPCorrgir { public class OcrTextProcessor : IDisposable { private readonly HttpClient _httpClient; private string _openAiApiKey; private bool _disposed; public OcrTextProcessor() { _httpClient = new HttpClient(); InitializeProcessor(); } private void InitializeProcessor() { try { LoadApiKey(); InitializeHttpClient(); } catch (Exception ex) { throw new ApplicationException("Error initializing OCR Text Processor", ex); } } private void LoadApiKey() { string configPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "appsettings.json"); if (!File.Exists(configPath)) { throw new FileNotFoundException("Configuration file (appsettings.json) not found."); } string jsonContent = File.ReadAllText(configPath); var settings = JsonConvert.DeserializeObject(jsonContent); _openAiApiKey = settings?.ApiKeys?.OpenAI; if (string.IsNullOrEmpty(_openAiApiKey)) { throw new ApplicationException("OpenAI API key is missing"); } } private void InitializeHttpClient() { _httpClient.Timeout = TimeSpan.FromSeconds(30); _httpClient.DefaultRequestHeaders.Clear(); _httpClient.DefaultRequestHeaders.Add("Accept", "application/json"); } public async Task ProcessOcrText(string ocrText) { try { // Correct OCR errors using LLM string correctedText = await CorrectOcrErrors(ocrText); //string correctedText = ocrText; // Apply minimal markdown formatting string formattedText = ApplyBasicFormatting(correctedText); // Copiar al portapapeles usando ClipboardHelper await ClipboardHelper.SetText(formattedText); return formattedText; } catch (Exception ex) { throw new ApplicationException("Error processing OCR text", ex); } } private async Task CorrectOcrErrors(string text) { try { _httpClient.DefaultRequestHeaders.Clear(); _httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_openAiApiKey}"); var requestData = new { model = "gpt-4", messages = new[] { new { role = "system", content = "You are an expert at correcting OCR errors. Fix common OCR mistakes like: confused characters (0/O, l/I, rn/m), broken words, and formatting issues. Preserve the original structure and meaning. Respond only with the corrected text in JSON format: {\"corrected_text\": \"your text here\"}" }, new { role = "user", content = $"Please correct any OCR errors in this text: {text}" } } }; var content = new StringContent( JsonConvert.SerializeObject(requestData), Encoding.UTF8, "application/json" ); using var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content); var responseContent = await response.Content.ReadAsStringAsync(); if (!response.IsSuccessStatusCode) { throw new HttpRequestException($"Error calling OpenAI API: {response.StatusCode} - {responseContent}"); } var responseData = JsonConvert.DeserializeObject(responseContent); string correctedText = responseData.choices[0].message.content; // Extract the actual text from the JSON response var jsonResponse = JsonConvert.DeserializeObject(correctedText); return jsonResponse.corrected_text.ToString(); } catch (Exception ex) { throw new ApplicationException("Error correcting OCR text", ex); } } private string ApplyBasicFormatting(string text) { var lines = text.Split('\n'); var result = new StringBuilder(); for (int i = 0; i < lines.Length; i++) { string currentLine = lines[i].Trim(); // Skip empty lines if (string.IsNullOrWhiteSpace(currentLine)) { result.AppendLine(); continue; } // Basic heading detection (ALL CAPS lines) if (currentLine == currentLine.ToUpper() && currentLine.Length > 20) { result.AppendLine($"# {currentLine}"); } // Basic list detection else if (currentLine.StartsWith("•") || currentLine.StartsWith("*")) { result.AppendLine($"- {currentLine.Substring(1).Trim()}"); } // Numbered list detection else if (System.Text.RegularExpressions.Regex.IsMatch(currentLine, @"^\d+[\.\)]")) { result.AppendLine(currentLine); } // Normal text else { result.AppendLine(currentLine); } } return result.ToString().Trim(); } public void Dispose() { Dispose(true); GC.SuppressFinalize(this); } protected virtual void Dispose(bool disposing) { if (!_disposed) { if (disposing) { _httpClient?.Dispose(); } _disposed = true; } } ~OcrTextProcessor() { Dispose(false); } } }