344 lines
10 KiB
Dart
344 lines
10 KiB
Dart
import 'package:easy_localization/easy_localization.dart';
|
|
import 'package:flutter_mimir/flutter_mimir.dart';
|
|
import 'package:search_engine/database.dart';
|
|
import 'package:flutter/foundation.dart';
|
|
|
|
typedef ProgressCallback = void Function(String message, double progress);
|
|
|
|
class MimirService {
|
|
static final MimirService _instance = MimirService._internal();
|
|
factory MimirService() => _instance;
|
|
MimirService._internal();
|
|
|
|
late MimirIndex _index;
|
|
bool _isInitialized = false;
|
|
|
|
Future<void> initialize() async {
|
|
if (_isInitialized) return;
|
|
|
|
final instance = await Mimir.defaultInstance;
|
|
_index = instance.getIndex('messages');
|
|
_isInitialized = true;
|
|
}
|
|
|
|
Future<int> getDocumentCount() async {
|
|
if (!_isInitialized) await initialize();
|
|
final stats = await _index.getAllDocuments();
|
|
return stats.length;
|
|
}
|
|
|
|
Future<List<Map<String, dynamic>>> getAllDocuments() async {
|
|
if (!_isInitialized) await initialize();
|
|
return await _index.getAllDocuments();
|
|
}
|
|
|
|
Future<void> addDocument(
|
|
String id, String languagesCode, String content) async {
|
|
if (!_isInitialized) await initialize();
|
|
|
|
await _index.addDocument({
|
|
'id': id,
|
|
'languages_code': languagesCode,
|
|
'content': content,
|
|
});
|
|
}
|
|
|
|
Future<void> addDocuments(List<Map<String, String>> documents) async {
|
|
if (!_isInitialized) await initialize();
|
|
|
|
await _index.addDocuments(documents);
|
|
}
|
|
|
|
Future<Map<String, dynamic>> search(String query, String languagesCode,
|
|
{int limit = 1000, int offset = 0}) async {
|
|
if (!_isInitialized) await initialize();
|
|
|
|
// Execute the search without pagination to get ALL matching results
|
|
final allResults = await _index.search(
|
|
query: query,
|
|
filter: Mimir.where('languages_code', isEqualTo: languagesCode),
|
|
);
|
|
|
|
if (kDebugMode && allResults.isNotEmpty) {
|
|
print('🔍 Mimir encontró ${allResults.length} resultados totales');
|
|
}
|
|
|
|
// Apply pagination AFTER getting all results
|
|
final start = offset;
|
|
final end = offset + limit;
|
|
final paginatedResults = allResults.sublist(
|
|
start.clamp(0, allResults.length),
|
|
end.clamp(0, allResults.length),
|
|
);
|
|
|
|
// Transform results to include both id and content snippet
|
|
final transformedResults = paginatedResults.map((doc) {
|
|
final String content = doc['content'] as String;
|
|
// Extract a snippet of content and position information
|
|
final Map<String, dynamic> snippetInfo =
|
|
_extractSnippetWithPosition(content, query);
|
|
|
|
return {
|
|
'id': doc['id'] as String,
|
|
'content': snippetInfo['snippet'] as String,
|
|
'position': snippetInfo['position'].toString(),
|
|
'length': snippetInfo['length'].toString(),
|
|
};
|
|
}).toList();
|
|
|
|
// Return both the paginated results and the total count
|
|
return {
|
|
'results': transformedResults,
|
|
'total': allResults.length,
|
|
'allResultIds': allResults.map((doc) => doc['id'] as String).toList(),
|
|
};
|
|
}
|
|
|
|
// Helper method to extract a relevant snippet from content with position information
|
|
Map<String, dynamic> _extractSnippetWithPosition(
|
|
String content, String query) {
|
|
// Clean HTML tags and entities first
|
|
String cleanContent = _cleanHtmlContent(content);
|
|
|
|
// Default values
|
|
int snippetPosition = 0;
|
|
int matchLength = 0;
|
|
|
|
// If content is short enough, return it all
|
|
if (cleanContent.length <= 300) {
|
|
return {
|
|
'snippet': cleanContent,
|
|
'position': 0,
|
|
'length': cleanContent.length
|
|
};
|
|
}
|
|
|
|
// Try to find the query in the content
|
|
final lowerContent = cleanContent.toLowerCase();
|
|
final lowerQuery = query.toLowerCase();
|
|
final int position = lowerContent.indexOf(lowerQuery);
|
|
|
|
if (position != -1) {
|
|
// Found the query, extract a snippet around it
|
|
final int start = (position - 100).clamp(0, cleanContent.length);
|
|
final int end =
|
|
(position + query.length + 100).clamp(0, cleanContent.length);
|
|
|
|
String snippet = cleanContent.substring(start, end);
|
|
|
|
// Add ellipsis if needed
|
|
if (start > 0) snippet = '...$snippet';
|
|
if (end < cleanContent.length) snippet = '$snippet...';
|
|
|
|
// Calculate relative position in the snippet
|
|
snippetPosition = position - start;
|
|
if (start > 0) snippetPosition += 3; // Adjust for ellipsis
|
|
matchLength = query.length;
|
|
|
|
return {
|
|
'snippet': snippet,
|
|
'position': snippetPosition,
|
|
'length': matchLength
|
|
};
|
|
} else {
|
|
// Query not found directly, try with individual words
|
|
final List<String> queryWords =
|
|
query.split(' ').where((word) => word.trim().length > 2).toList();
|
|
|
|
for (final word in queryWords) {
|
|
final int wordPos = lowerContent.indexOf(word.toLowerCase());
|
|
if (wordPos != -1) {
|
|
final int start = (wordPos - 100).clamp(0, cleanContent.length);
|
|
final int end =
|
|
(wordPos + word.length + 100).clamp(0, cleanContent.length);
|
|
|
|
String snippet = cleanContent.substring(start, end);
|
|
|
|
if (start > 0) snippet = '...$snippet';
|
|
if (end < cleanContent.length) snippet = '$snippet...';
|
|
|
|
// Calculate relative position in the snippet
|
|
snippetPosition = wordPos - start;
|
|
if (start > 0) snippetPosition += 3; // Adjust for ellipsis
|
|
matchLength = word.length;
|
|
|
|
return {
|
|
'snippet': snippet,
|
|
'position': snippetPosition,
|
|
'length': matchLength
|
|
};
|
|
}
|
|
}
|
|
|
|
// No match found, return the beginning of the content
|
|
return {
|
|
'snippet': '${cleanContent.substring(0, 300)}...',
|
|
'position': 0,
|
|
'length': 0
|
|
};
|
|
}
|
|
}
|
|
|
|
// Helper method to extract a snippet without position information (legacy)
|
|
String _extractSnippet(String content, String query) {
|
|
return _extractSnippetWithPosition(content, query)['snippet'] as String;
|
|
}
|
|
|
|
// Helper method to clean HTML content using regex
|
|
String _cleanHtmlContent(String html) {
|
|
if (html.isEmpty) return '';
|
|
|
|
// Step 1: Remove HTML tags
|
|
String result = html.replaceAll(RegExp(r'<[^>]*>'), ' ');
|
|
|
|
// Step 2: Replace common HTML entities
|
|
final Map<String, String> htmlEntities = {
|
|
' ': ' ',
|
|
'&': '&',
|
|
'<': '<',
|
|
'>': '>',
|
|
'"': '"',
|
|
''': "'",
|
|
'¢': '¢',
|
|
'£': '£',
|
|
'¥': '¥',
|
|
'€': '€',
|
|
'©': '©',
|
|
'®': '®',
|
|
'á': 'á',
|
|
'é': 'é',
|
|
'í': 'í',
|
|
'ó': 'ó',
|
|
'ú': 'ú',
|
|
'ñ': 'ñ',
|
|
'Á': 'Á',
|
|
'É': 'É',
|
|
'Í': 'Í',
|
|
'Ó': 'Ó',
|
|
'Ú': 'Ú',
|
|
'Ñ': 'Ñ',
|
|
};
|
|
|
|
// Replace known HTML entities
|
|
htmlEntities.forEach((entity, replacement) {
|
|
result = result.replaceAll(entity, replacement);
|
|
});
|
|
|
|
// Step 3: Replace numeric HTML entities (like {)
|
|
result = result.replaceAllMapped(RegExp(r'&#(\d+);'), (match) {
|
|
try {
|
|
final int charCode = int.parse(match.group(1)!);
|
|
return String.fromCharCode(charCode);
|
|
} catch (e) {
|
|
return '';
|
|
}
|
|
});
|
|
|
|
// Step 4: Replace any remaining entities with a generic pattern
|
|
result = result.replaceAll(RegExp(r'&[a-zA-Z0-9]+;'), '');
|
|
|
|
// Step 5: Normalize whitespace (replace multiple spaces with a single space)
|
|
result = result.replaceAll(RegExp(r'\s+'), ' ').trim();
|
|
|
|
return result;
|
|
}
|
|
|
|
Future<void> deleteDocument(String id) async {
|
|
if (!_isInitialized) await initialize();
|
|
|
|
await _index.deleteDocument(id);
|
|
}
|
|
|
|
Future<void> clearIndex() async {
|
|
if (!_isInitialized) await initialize();
|
|
|
|
await _index.deleteAllDocuments();
|
|
}
|
|
|
|
Future<void> syncWithDatabase({ProgressCallback? onProgress}) async {
|
|
if (!_isInitialized) await initialize();
|
|
|
|
final database = AppDatabase();
|
|
try {
|
|
// Get all documents from Mimir
|
|
final mimirDocuments = await getAllDocuments();
|
|
final mimirCount = mimirDocuments.length;
|
|
|
|
// Create a set of document IDs that are already in Mimir
|
|
final mimirIds =
|
|
Set<String>.from(mimirDocuments.map((doc) => doc['id'] as String));
|
|
|
|
// Get all messages from database
|
|
final messages = await database.getAllMessages();
|
|
final dbCount = messages.length;
|
|
|
|
if (kDebugMode) {
|
|
print('📊 Documentos en Mimir: $mimirCount');
|
|
print('📊 Documentos en Base de Datos: $dbCount');
|
|
}
|
|
|
|
// Find documents that need to be added (in DB but not in Mimir)
|
|
final documentsToAdd = messages.where((draft) {
|
|
return !mimirIds.contains(draft.id) &&
|
|
draft.body != null &&
|
|
draft.body!.isNotEmpty;
|
|
}).toList();
|
|
|
|
if (documentsToAdd.isNotEmpty) {
|
|
if (kDebugMode) {
|
|
print('🔄 Documentos a agregar a Mimir: ${documentsToAdd.length}');
|
|
}
|
|
|
|
onProgress?.call('updating_search_index'.tr(), 0.0);
|
|
|
|
// Prepare documents for Mimir
|
|
final documents = documentsToAdd.map((draft) {
|
|
return {
|
|
'id': draft.id,
|
|
'languages_code': draft.languagesCode,
|
|
'content': draft.body!,
|
|
};
|
|
}).toList();
|
|
|
|
if (kDebugMode) {
|
|
print('📝 Documentos válidos para indexar: ${documents.length}');
|
|
}
|
|
|
|
// Add documents to Mimir in batches
|
|
const batchSize = 50;
|
|
var indexedCount = 0;
|
|
for (var i = 0; i < documents.length; i += batchSize) {
|
|
final end = (i + batchSize < documents.length)
|
|
? i + batchSize
|
|
: documents.length;
|
|
final batch = documents.sublist(i, end);
|
|
await addDocuments(batch);
|
|
indexedCount += batch.length;
|
|
|
|
// Update progress
|
|
final progress = indexedCount / documents.length;
|
|
onProgress?.call('updating_search_index'.tr(), progress);
|
|
|
|
if (kDebugMode) {
|
|
print(
|
|
'✓ Progreso de indexación: $indexedCount/${documents.length}');
|
|
}
|
|
}
|
|
|
|
if (kDebugMode) {
|
|
final finalCount = await getDocumentCount();
|
|
print('✅ Indexación completada. Documentos en Mimir: $finalCount');
|
|
}
|
|
|
|
onProgress?.call('search_index_updated'.tr(), 1.0);
|
|
} else {
|
|
if (kDebugMode) {
|
|
print('✅ Mimir y Base de Datos están sincronizados');
|
|
}
|
|
}
|
|
} finally {
|
|
database.close();
|
|
}
|
|
}
|
|
}
|