md-app/lib/services/mimir_service.dart

344 lines
10 KiB
Dart

import 'package:easy_localization/easy_localization.dart';
import 'package:flutter_mimir/flutter_mimir.dart';
import 'package:search_engine/database.dart';
import 'package:flutter/foundation.dart';
typedef ProgressCallback = void Function(String message, double progress);
class MimirService {
static final MimirService _instance = MimirService._internal();
factory MimirService() => _instance;
MimirService._internal();
late MimirIndex _index;
bool _isInitialized = false;
Future<void> initialize() async {
if (_isInitialized) return;
final instance = await Mimir.defaultInstance;
_index = instance.getIndex('messages');
_isInitialized = true;
}
Future<int> getDocumentCount() async {
if (!_isInitialized) await initialize();
final stats = await _index.getAllDocuments();
return stats.length;
}
Future<List<Map<String, dynamic>>> getAllDocuments() async {
if (!_isInitialized) await initialize();
return await _index.getAllDocuments();
}
Future<void> addDocument(
String id, String languagesCode, String content) async {
if (!_isInitialized) await initialize();
await _index.addDocument({
'id': id,
'languages_code': languagesCode,
'content': content,
});
}
Future<void> addDocuments(List<Map<String, String>> documents) async {
if (!_isInitialized) await initialize();
await _index.addDocuments(documents);
}
Future<Map<String, dynamic>> search(String query, String languagesCode,
{int limit = 1000, int offset = 0}) async {
if (!_isInitialized) await initialize();
// Execute the search without pagination to get ALL matching results
final allResults = await _index.search(
query: query,
filter: Mimir.where('languages_code', isEqualTo: languagesCode),
);
if (kDebugMode && allResults.isNotEmpty) {
print('🔍 Mimir encontró ${allResults.length} resultados totales');
}
// Apply pagination AFTER getting all results
final start = offset;
final end = offset + limit;
final paginatedResults = allResults.sublist(
start.clamp(0, allResults.length),
end.clamp(0, allResults.length),
);
// Transform results to include both id and content snippet
final transformedResults = paginatedResults.map((doc) {
final String content = doc['content'] as String;
// Extract a snippet of content and position information
final Map<String, dynamic> snippetInfo =
_extractSnippetWithPosition(content, query);
return {
'id': doc['id'] as String,
'content': snippetInfo['snippet'] as String,
'position': snippetInfo['position'].toString(),
'length': snippetInfo['length'].toString(),
};
}).toList();
// Return both the paginated results and the total count
return {
'results': transformedResults,
'total': allResults.length,
'allResultIds': allResults.map((doc) => doc['id'] as String).toList(),
};
}
// Helper method to extract a relevant snippet from content with position information
Map<String, dynamic> _extractSnippetWithPosition(
String content, String query) {
// Clean HTML tags and entities first
String cleanContent = _cleanHtmlContent(content);
// Default values
int snippetPosition = 0;
int matchLength = 0;
// If content is short enough, return it all
if (cleanContent.length <= 300) {
return {
'snippet': cleanContent,
'position': 0,
'length': cleanContent.length
};
}
// Try to find the query in the content
final lowerContent = cleanContent.toLowerCase();
final lowerQuery = query.toLowerCase();
final int position = lowerContent.indexOf(lowerQuery);
if (position != -1) {
// Found the query, extract a snippet around it
final int start = (position - 100).clamp(0, cleanContent.length);
final int end =
(position + query.length + 100).clamp(0, cleanContent.length);
String snippet = cleanContent.substring(start, end);
// Add ellipsis if needed
if (start > 0) snippet = '...$snippet';
if (end < cleanContent.length) snippet = '$snippet...';
// Calculate relative position in the snippet
snippetPosition = position - start;
if (start > 0) snippetPosition += 3; // Adjust for ellipsis
matchLength = query.length;
return {
'snippet': snippet,
'position': snippetPosition,
'length': matchLength
};
} else {
// Query not found directly, try with individual words
final List<String> queryWords =
query.split(' ').where((word) => word.trim().length > 2).toList();
for (final word in queryWords) {
final int wordPos = lowerContent.indexOf(word.toLowerCase());
if (wordPos != -1) {
final int start = (wordPos - 100).clamp(0, cleanContent.length);
final int end =
(wordPos + word.length + 100).clamp(0, cleanContent.length);
String snippet = cleanContent.substring(start, end);
if (start > 0) snippet = '...$snippet';
if (end < cleanContent.length) snippet = '$snippet...';
// Calculate relative position in the snippet
snippetPosition = wordPos - start;
if (start > 0) snippetPosition += 3; // Adjust for ellipsis
matchLength = word.length;
return {
'snippet': snippet,
'position': snippetPosition,
'length': matchLength
};
}
}
// No match found, return the beginning of the content
return {
'snippet': '${cleanContent.substring(0, 300)}...',
'position': 0,
'length': 0
};
}
}
// Helper method to extract a snippet without position information (legacy)
String _extractSnippet(String content, String query) {
return _extractSnippetWithPosition(content, query)['snippet'] as String;
}
// Helper method to clean HTML content using regex
String _cleanHtmlContent(String html) {
if (html.isEmpty) return '';
// Step 1: Remove HTML tags
String result = html.replaceAll(RegExp(r'<[^>]*>'), ' ');
// Step 2: Replace common HTML entities
final Map<String, String> htmlEntities = {
'&nbsp;': ' ',
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&apos;': "'",
'&cent;': '¢',
'&pound;': '£',
'&yen;': '¥',
'&euro;': '',
'&copy;': '©',
'&reg;': '®',
'&aacute;': 'á',
'&eacute;': 'é',
'&iacute;': 'í',
'&oacute;': 'ó',
'&uacute;': 'ú',
'&ntilde;': 'ñ',
'&Aacute;': 'Á',
'&Eacute;': 'É',
'&Iacute;': 'Í',
'&Oacute;': 'Ó',
'&Uacute;': 'Ú',
'&Ntilde;': 'Ñ',
};
// Replace known HTML entities
htmlEntities.forEach((entity, replacement) {
result = result.replaceAll(entity, replacement);
});
// Step 3: Replace numeric HTML entities (like &#123;)
result = result.replaceAllMapped(RegExp(r'&#(\d+);'), (match) {
try {
final int charCode = int.parse(match.group(1)!);
return String.fromCharCode(charCode);
} catch (e) {
return '';
}
});
// Step 4: Replace any remaining entities with a generic pattern
result = result.replaceAll(RegExp(r'&[a-zA-Z0-9]+;'), '');
// Step 5: Normalize whitespace (replace multiple spaces with a single space)
result = result.replaceAll(RegExp(r'\s+'), ' ').trim();
return result;
}
Future<void> deleteDocument(String id) async {
if (!_isInitialized) await initialize();
await _index.deleteDocument(id);
}
Future<void> clearIndex() async {
if (!_isInitialized) await initialize();
await _index.deleteAllDocuments();
}
Future<void> syncWithDatabase({ProgressCallback? onProgress}) async {
if (!_isInitialized) await initialize();
final database = AppDatabase();
try {
// Get all documents from Mimir
final mimirDocuments = await getAllDocuments();
final mimirCount = mimirDocuments.length;
// Create a set of document IDs that are already in Mimir
final mimirIds =
Set<String>.from(mimirDocuments.map((doc) => doc['id'] as String));
// Get all messages from database
final messages = await database.getAllMessages();
final dbCount = messages.length;
if (kDebugMode) {
print('📊 Documentos en Mimir: $mimirCount');
print('📊 Documentos en Base de Datos: $dbCount');
}
// Find documents that need to be added (in DB but not in Mimir)
final documentsToAdd = messages.where((draft) {
return !mimirIds.contains(draft.id) &&
draft.body != null &&
draft.body!.isNotEmpty;
}).toList();
if (documentsToAdd.isNotEmpty) {
if (kDebugMode) {
print('🔄 Documentos a agregar a Mimir: ${documentsToAdd.length}');
}
onProgress?.call('updating_search_index'.tr(), 0.0);
// Prepare documents for Mimir
final documents = documentsToAdd.map((draft) {
return {
'id': draft.id,
'languages_code': draft.languagesCode,
'content': draft.body!,
};
}).toList();
if (kDebugMode) {
print('📝 Documentos válidos para indexar: ${documents.length}');
}
// Add documents to Mimir in batches
const batchSize = 50;
var indexedCount = 0;
for (var i = 0; i < documents.length; i += batchSize) {
final end = (i + batchSize < documents.length)
? i + batchSize
: documents.length;
final batch = documents.sublist(i, end);
await addDocuments(batch);
indexedCount += batch.length;
// Update progress
final progress = indexedCount / documents.length;
onProgress?.call('updating_search_index'.tr(), progress);
if (kDebugMode) {
print(
'✓ Progreso de indexación: $indexedCount/${documents.length}');
}
}
if (kDebugMode) {
final finalCount = await getDocumentCount();
print('✅ Indexación completada. Documentos en Mimir: $finalCount');
}
onProgress?.call('search_index_updated'.tr(), 1.0);
} else {
if (kDebugMode) {
print('✅ Mimir y Base de Datos están sincronizados');
}
}
} finally {
database.close();
}
}
}