using GTPCorrgir; using System; using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; using Newtonsoft.Json; namespace CtrEditor.Services { public class LLMService { private readonly gtpask _llmProcessor; public LLMService() { _llmProcessor = new gtpask(); } public async Task ProcessText(string text, bool useMarkdown = false) { try { _llmProcessor.TextoACorregir = text; // Create system prompt through constructor or initialization _llmProcessor.TextodeSistema = "You are an OCR correction specialist. Analyze and correct any obvious OCR errors." + "\nPay special attention to:" + "\n- Incorrectly joined words (missing spaces)" + "\n- Wrong character recognition (0 vs O, 1 vs I, etc.)" + "\n- Extra or missing characters" + "\nReturn only the corrected text without explanations."; // Initialize a new instance with the system prompt await _llmProcessor.CorregirTexto(); return _llmProcessor.TextoCorregido; } catch (Exception ex) { throw new Exception($"Error processing text with LLM: {ex.Message}", ex); } } public async Task> ProcessTextBatch( List<(string Source, string Target)> textPairs, string sourceLanguage = "English", string targetLanguage = "English") { try { var textPairsJson = JsonConvert.SerializeObject( textPairs.Select(p => new[] { p.Source, p.Target }).ToList() ); _llmProcessor.TextoACorregir = textPairsJson; _llmProcessor.TextodeSistema = $@"You are an OCR correction specialist working with {sourceLanguage} and {targetLanguage} texts. For each pair, the first text is in {sourceLanguage} and the second text is in {targetLanguage}. Pay special attention to: - Language-specific characters and accents for both {sourceLanguage} and {targetLanguage} - Incorrectly joined words (missing spaces) - Wrong character recognition (0 vs O, 1 vs I, etc.) - Extra or missing characters Return the corrected versions in JSON format as a list of pairs. Input: [[""source text"", ""target text""]] Expected output format: ```json[[""corrected source"", ""corrected target""]]```"; await _llmProcessor.CorregirTexto(); // Extract JSON content from markdown string jsonContent = ExtractJsonFromMarkdown(_llmProcessor.TextoCorregido); if (string.IsNullOrEmpty(jsonContent)) { throw new Exception("Could not extract JSON content from LLM response"); } var result = JsonConvert.DeserializeObject>(jsonContent); return result.Select(pair => (pair[0].TrimEnd('\n'), pair[1].TrimEnd('\n'))).ToList(); } catch (Exception ex) { throw new Exception($"Error processing text batch with LLM: {ex.Message}", ex); } } private string ExtractJsonFromMarkdown(string markdownText) { const string jsonStart = "```json"; const string codeBlockEnd = "```"; var startIndex = markdownText.IndexOf(jsonStart); if (startIndex == -1) return null; startIndex += jsonStart.Length; var endIndex = markdownText.IndexOf(codeBlockEnd, startIndex); if (endIndex == -1) return null; return markdownText.Substring(startIndex, endIndex - startIndex).Trim(); } public void Dispose() { _llmProcessor?.Dispose(); } } }