GTPCorrgir/OcrTextProcessor.cs

199 lines
6.5 KiB
C#

using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
namespace GTPCorrgir
{
public class OcrTextProcessor : IDisposable
{
private readonly HttpClient _httpClient;
private string _openAiApiKey;
private bool _disposed;
public OcrTextProcessor()
{
_httpClient = new HttpClient();
InitializeProcessor();
}
private void InitializeProcessor()
{
try
{
LoadApiKey();
InitializeHttpClient();
}
catch (Exception ex)
{
throw new ApplicationException("Error initializing OCR Text Processor", ex);
}
}
private void LoadApiKey()
{
string configPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "appsettings.json");
if (!File.Exists(configPath))
{
throw new FileNotFoundException("Configuration file (appsettings.json) not found.");
}
string jsonContent = File.ReadAllText(configPath);
var settings = JsonConvert.DeserializeObject<ApiSettings>(jsonContent);
_openAiApiKey = settings?.ApiKeys?.OpenAI;
if (string.IsNullOrEmpty(_openAiApiKey))
{
throw new ApplicationException("OpenAI API key is missing");
}
}
private void InitializeHttpClient()
{
_httpClient.Timeout = TimeSpan.FromSeconds(30);
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("Accept", "application/json");
}
public async Task<string> ProcessOcrText(string ocrText)
{
try
{
// Correct OCR errors using LLM
string correctedText = await CorrectOcrErrors(ocrText);
//string correctedText = ocrText;
// Apply minimal markdown formatting
string formattedText = ApplyBasicFormatting(correctedText);
// Copiar al portapapeles usando ClipboardHelper
await ClipboardHelper.SetText(formattedText);
return formattedText;
}
catch (Exception ex)
{
throw new ApplicationException("Error processing OCR text", ex);
}
}
private async Task<string> CorrectOcrErrors(string text)
{
try
{
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_openAiApiKey}");
var requestData = new
{
model = "gpt-4",
messages = new[]
{
new {
role = "system",
content = "You are an expert at correcting OCR errors. Fix common OCR mistakes like: confused characters (0/O, l/I, rn/m), broken words, and formatting issues. Preserve the original structure and meaning. Respond only with the corrected text in JSON format: {\"corrected_text\": \"your text here\"}"
},
new {
role = "user",
content = $"Please correct any OCR errors in this text: {text}"
}
}
};
var content = new StringContent(
JsonConvert.SerializeObject(requestData),
Encoding.UTF8,
"application/json"
);
using var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
var responseContent = await response.Content.ReadAsStringAsync();
if (!response.IsSuccessStatusCode)
{
throw new HttpRequestException($"Error calling OpenAI API: {response.StatusCode} - {responseContent}");
}
var responseData = JsonConvert.DeserializeObject<dynamic>(responseContent);
string correctedText = responseData.choices[0].message.content;
// Extract the actual text from the JSON response
var jsonResponse = JsonConvert.DeserializeObject<dynamic>(correctedText);
return jsonResponse.corrected_text.ToString();
}
catch (Exception ex)
{
throw new ApplicationException("Error correcting OCR text", ex);
}
}
private string ApplyBasicFormatting(string text)
{
var lines = text.Split('\n');
var result = new StringBuilder();
for (int i = 0; i < lines.Length; i++)
{
string currentLine = lines[i].Trim();
// Skip empty lines
if (string.IsNullOrWhiteSpace(currentLine))
{
result.AppendLine();
continue;
}
// Basic heading detection (ALL CAPS lines)
if (currentLine == currentLine.ToUpper() && currentLine.Length > 20)
{
result.AppendLine($"# {currentLine}");
}
// Basic list detection
else if (currentLine.StartsWith("•") || currentLine.StartsWith("*"))
{
result.AppendLine($"- {currentLine.Substring(1).Trim()}");
}
// Numbered list detection
else if (System.Text.RegularExpressions.Regex.IsMatch(currentLine, @"^\d+[\.\)]"))
{
result.AppendLine(currentLine);
}
// Normal text
else
{
result.AppendLine(currentLine);
}
}
return result.ToString().Trim();
}
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
if (disposing)
{
_httpClient?.Dispose();
}
_disposed = true;
}
}
~OcrTextProcessor()
{
Dispose(false);
}
}
}