Skip to content

Commit

Permalink
feat: More_work_on_search (#442)
Browse files Browse the repository at this point in the history
* feat: pick the distance between words in search

* fix: added fuzzy search and better layout for search

* Breaking: added reference to index

* fix: changed full text search layout

* fix: merged book list and book tree to one place, removing topics.

* fix: if index does not match, reset the index.

* fix: roll back to query parser for fuzzy search.
* disable distance for fuzzy searches, as they aren't supported by tantivy.

* feat: added results counts for every facet(topic)

* fix: fixed the bug - endless loop

* feat: added search results counts to books.
feat: select content of text controler for library browser and find ref

* minor fixes

* feat: order search results by relevance/catalogue order

* fix: returned selectionArea

* feat: added option to reset index

* feat: faceted search

* refactor: reorganizes the code

* Update flutter.yml

* chore: fixed use of search_engine from git

* Merge branch 'more_work_on_search' of https://github.com/Sivan22/otzaria into more_work_on_search

* Update flutter.yml

* chore: fixed typo

* Merge branch 'more_work_on_search' of https://github.com/Sivan22/otzaria into more_work_on_search

* feat: adding selection using ctrl

* Merge branch 'main' into more_work_on_search
  • Loading branch information
Sivan22 authored Jan 28, 2025
1 parent 2a04d1b commit b4d0650
Show file tree
Hide file tree
Showing 33 changed files with 886 additions and 518 deletions.
1 change: 1 addition & 0 deletions .github/workflows/flutter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ jobs:
with:
channel: stable
cache: true
- run: sudo apt install ninja-build
- run: flutter pub get
- run: flutter build apk
- name: Upload apk
Expand Down
23 changes: 22 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,22 @@ migrate_working_dir/
.flutter-plugins-dependencies
.pub-cache/
.pub/
/build/
build/
/windows/
android/
linux/
macos/
ios/
.dart_tool
web/
images/
fonts/
assets/
.idea/




# Symbolication related
app.*.symbols

Expand All @@ -43,3 +57,10 @@ app.*.map.json
/android/app/debug
/android/app/profile
/android/app/release

.git/
.vscode/
android/
build/
fonts/

2 changes: 2 additions & 0 deletions lib/data/data_providers/file_system_data_provider.dart
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class FileSystemData {
category.books.add(
PdfBook(
title: title,
category: category,
path: entity.path,
author: metadata[title]?['author'],
heShortDesc: metadata[title]?['heShortDesc'],
Expand All @@ -113,6 +114,7 @@ class FileSystemData {
final title = getTitleFromPath(entity.path);
category.books.add(TextBook(
title: title,
category: category,
author: metadata[title]?['author'],
heShortDesc: metadata[title]?['heShortDesc'],
pubDate: metadata[title]?['pubDate'],
Expand Down
4 changes: 1 addition & 3 deletions lib/data/data_providers/isar_data_provider.dart
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class IsarDataProvider {
}
isar.write((isar) => isar.refs.putAll(refs));
} catch (e) {
print(' Failed creating refs for ${book.title} $e');
print('Failed creating refs for ${book.title} $e');
}
}

Expand Down Expand Up @@ -144,7 +144,6 @@ class IsarDataProvider {
pdfBook: true,
pdfPath: pdfBooks[i].path,
);
print('Adding Pdf ref: ${ref.ref}');
isar.write((isar) => isar.refs.put(ref));
}
}
Expand Down Expand Up @@ -242,7 +241,6 @@ class IsarDataProvider {
linesNumOfbooksDone.value = 0;

for (TextBook book in books) {
print('Adding lines for ${book.title}');
await addLinesForBook(book);
linesNumOfbooksDone.value = books.indexOf(book) + 1;
}
Expand Down
58 changes: 40 additions & 18 deletions lib/data/data_providers/tantivy_data_provider.dart
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class TantivyDataProvider {
/// Instance of the search engine pointing to the index directory
late Future<SearchEngine> engine;

/// Instance of the search engine pointing to the index directory
late Future<SearchEngine> engine;

static final TantivyDataProvider _singleton = TantivyDataProvider();
static TantivyDataProvider instance = _singleton;

Expand All @@ -40,14 +43,18 @@ class TantivyDataProvider {
/// Uses Hive for persistent storage of indexed book records, storing them in the 'index'
/// subdirectory of the configured library path.
TantivyDataProvider() {
reopenIndex();
}

void reopenIndex() {
String indexPath = (Settings.getValue('key-library-path') ?? 'C:/אוצריא') +
Platform.pathSeparator +
'index';

engine = SearchEngine.newInstance(path: indexPath);

//test the engine
searchTexts('בראשית', ['בראשית'], 1);
searchTexts('בראשית', ['/'], 1);

booksDone = Hive.box(
name: 'books_indexed',
Expand All @@ -67,14 +74,13 @@ class TantivyDataProvider {
.put('key-books-done', booksDone);
}

Future<int> countTexts(String query, List<String> books, String topics,
Future<int> countTexts(String query, List<String> books, List<String> facets,
{bool fuzzy = false, int distance = 2}) async {
final index = await engine;
if (!fuzzy) {
query = distance > 0 ? '"$query"~$distance' : '"$query"';
}
return index.count(
query: query, books: books, topics: topics, fuzzy: fuzzy);
return index.count(query: query, facets: facets, fuzzy: fuzzy);
}

/// Performs a synchronous search operation across indexed texts.
Expand All @@ -86,8 +92,10 @@ class TantivyDataProvider {
///
/// Returns a Future containing a list of search results
Future<List<SearchResult>> searchTexts(
String query, List<String> books, int limit,
{bool fuzzy = false, int distance = 2}) async {
String query, List<String> facets, int limit,
{ResultsOrder order = ResultsOrder.relevance,
bool fuzzy = false,
int distance = 2}) async {
SearchEngine index;
try {
index = await engine;
Expand All @@ -102,7 +110,6 @@ class TantivyDataProvider {
"PanicException(Failed to create index: SchemaError(\"An index exists but the schema does not match.\"))") {
Directory indexDirectory = Directory(indexPath);
Hive.box(name: 'books_indexed', directory: indexPath).close();
print('Deleting index and creating a new one');
indexDirectory.deleteSync(recursive: true);
indexDirectory.createSync(recursive: true);
engine = SearchEngine.newInstance(path: indexPath);
Expand All @@ -115,7 +122,7 @@ class TantivyDataProvider {
query = distance > 0 ? '"$query"~$distance' : '"$query"';
}
return await index.search(
query: query, books: books, limit: limit, fuzzy: fuzzy);
query: query, facets: facets, limit: limit, fuzzy: fuzzy, order: order);
}

/// Performs an asynchronous stream-based search operation across indexed texts.
Expand All @@ -127,10 +134,10 @@ class TantivyDataProvider {
///
/// Returns a Stream of search results that can be listened to for real-time updates
Stream<List<SearchResult>> searchTextsStream(
String query, List<String> books, int limit, bool fuzzy) async* {
String query, List<String> facets, int limit, bool fuzzy) async* {
final index = await engine;
yield* index.searchStream(
query: query, books: books, limit: limit, fuzzy: fuzzy);
query: query, facets: facets, limit: limit, fuzzy: fuzzy);
}

/// Indexes all books in the provided library within the specified range.
Expand All @@ -154,13 +161,13 @@ class TantivyDataProvider {
if (!isIndexing.value) {
return;
}
print('Adding ${book.title} to index');
try {
// Handle different book types appropriately
if (book is TextBook) {
await addTextsToTantivy(book);
} else if (book is PdfBook) {
await addPdfTextsToTantivy(book);
await addPdfTextsToTantivy(book);
}
} catch (e) {
print('Error adding ${book.title} to index: $e');
Expand All @@ -186,19 +193,21 @@ class TantivyDataProvider {
var text = await book.text;
final title = book.title;
final topics = "/${book.topics.replaceAll(', ', '/')}";
final topics = "/${book.topics.replaceAll(', ', '/')}";

// Check if book was already indexed using content hash
final hash = sha1.convert(utf8.encode(text)).toString();
if (booksDone.contains(hash)) {
print('${book.title} already in index');
numOfbooksDone.value = numOfbooksDone.value! + 1;
return;
}

// Preprocess text by removing HTML and vowel marks


final texts = text.split('\n');
List<String> reference = [];
List<String> reference = [];
// Index each line separately
for (int i = 0; i < texts.length; i++) {
if (!isIndexing.value) {
Expand All @@ -220,10 +229,10 @@ class TantivyDataProvider {
line = stripHtmlIfNeeded(line);
line = removeVolwels(line);
index.addDocument(
id: BigInt.from(hashCode + i),
id: BigInt.from(DateTime.now().microsecondsSinceEpoch),
title: title,
reference: stripHtmlIfNeeded(reference.join(', ')),
topics: topics,
topics: '$topics/$title',
text: line,
segment: BigInt.from(i),
isPdf: false,
Expand All @@ -234,7 +243,6 @@ class TantivyDataProvider {
await index.commit();
booksDone.add(hash);
saveBooksDoneToDisk();
print('Added ${book.title} to index');
numOfbooksDone.value = numOfbooksDone.value! + 1;
}

Expand All @@ -246,14 +254,14 @@ class TantivyDataProvider {
/// 1. Computing a hash of the PDF file to check for previous indexing
/// 2. Extracting text from each page
/// 3. Splitting page text into lines and indexing each line separately
addPdfTextsToTantivy(PdfBook book) async {
addPdfTextsToTantivy(PdfBook book) async {
final index = await engine;

// Check if PDF was already indexed using file hash
final data = await File(book.path).readAsBytes();
final hash = sha1.convert(data).toString();
if (booksDone.contains(hash)) {
print('${book.title} already in index');
numOfbooksDone.value = numOfbooksDone.value! + 1;
return;
}
Expand All @@ -262,6 +270,7 @@ class TantivyDataProvider {
final pages = await PdfDocument.openData(data).then((value) => value.pages);
final title = book.title;
final topics = "/${book.topics.replaceAll(', ', '/')}";
final topics = "/${book.topics.replaceAll(', ', '/')}";

// Process each page
for (int i = 0; i < pages.length; i++) {
Expand All @@ -272,10 +281,11 @@ class TantivyDataProvider {
return;
}
index.addDocument(
id: BigInt.from(DateTime.now().microsecondsSinceEpoch),
id: BigInt.from(DateTime.now().microsecondsSinceEpoch),
title: title,
reference: '$title, עמוד ${i + 1}',
topics: topics,
topics: '$topics/$title',
text: texts[j],
segment: BigInt.from(i),
isPdf: true,
Expand All @@ -286,7 +296,19 @@ class TantivyDataProvider {
await index.commit();
booksDone.add(hash);
saveBooksDoneToDisk();
print('Added ${book.title} to index');
numOfbooksDone.value = numOfbooksDone.value! + 1;
}

void clear() async {
final index = await engine;
await index.clear();
booksDone.clear();
saveBooksDoneToDisk();
}

void cancelIndexing() async {
isIndexing.value = false;
numOfbooksDone.value = null;
numOfbooksTotal.value = null;
}
}
3 changes: 0 additions & 3 deletions lib/main.dart
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,5 @@ void createDirectoryIfNotExists(String path) {
Directory directory = Directory(path);
if (!directory.existsSync()) {
directory.createSync(recursive: true);
print('Directory created: $path');
} else {
print('Directory already exists: $path');
}
}
20 changes: 19 additions & 1 deletion lib/models/app_model.dart
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ import 'package:flutter_settings_screens/flutter_settings_screens.dart';
import 'package:fuzzywuzzy/fuzzywuzzy.dart';
import 'package:hive/hive.dart';
import 'package:otzaria/data/data_providers/file_system_data_provider.dart';
import 'package:otzaria/data/data_providers/tantivy_data_provider.dart';
import 'package:otzaria/data/repository/data_repository.dart';
import 'package:otzaria/models/bookmark.dart';
import 'package:otzaria/models/books.dart';
import 'package:otzaria/models/library.dart';
import 'package:otzaria/models/tabs.dart';
import 'package:otzaria/models/tabs/pdf_tab.dart';
import 'package:otzaria/models/tabs/searching_tab.dart';
import 'package:otzaria/models/tabs/tabs.dart';
import 'package:otzaria/models/tabs/text_tab.dart';
import 'package:otzaria/models/workspace.dart';
import 'package:otzaria/utils/calendar.dart';
import 'package:otzaria/utils/text_manipulation.dart' as utils;
Expand Down Expand Up @@ -118,6 +122,10 @@ class AppModel with ChangeNotifier {
Settings.getValue<bool>('key-use-fast-search') ?? true,
);

final ValueNotifier<bool> replaceHolyNames = ValueNotifier<bool>(
Settings.getValue<bool>('key-replace-holy-names') ?? true,
);

/// Focus node for the book locator search field
FocusNode bookLocatorFocusNode = FocusNode();

Expand Down Expand Up @@ -206,6 +214,15 @@ class AppModel with ChangeNotifier {
isDarkMode.addListener(() {
notifyListeners();
});
() async {
//Check if index is up to date
final totalBooks = (await library).getAllBooks().length;
final indexedBooks = TantivyDataProvider.instance.booksDone.length;
if (!TantivyDataProvider.instance.isIndexing.value &&
totalBooks - indexedBooks > 100) {
DataRepository.instance.addAllTextsToTantivy(await library);
}
}();
}

/// Opens a book in a new tab.
Expand Down Expand Up @@ -550,6 +567,7 @@ class AppModel with ChangeNotifier {
libraryPath = Settings.getValue<String>('key-library-path') ?? libraryPath;
FileSystemData.instance.libraryPath = libraryPath;
library = data.getLibrary();
TantivyDataProvider.instance.reopenIndex();
notifyListeners();
}
}
Expand Down
Loading

0 comments on commit b4d0650

Please sign in to comment.