Skip to content

Commit

Permalink
Fix tabby partially read
Browse files Browse the repository at this point in the history
  • Loading branch information
sunveil committed Nov 20, 2023
1 parent fa396ef commit 5e37d6c
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,10 @@ def __extract(self, path: str, parameters: dict, warnings: list)\
return all_lines, all_tables, all_tables_on_images, all_attached_images, document_metadata

# in java tabby reader page numeration starts with 1, end_page is included
# first_tabby_page = first_page + 1 if first_page is not None else 1
# last_tabby_page = None if last_page is not None and last_page > page_count else last_page
# document = self.__process_pdf(path=path, start_page=first_tabby_page, end_page=last_tabby_page) TODO TLDR-518
first_tabby_page = first_page + 1 if first_page is not None else 1
last_tabby_page = None if last_page is not None and last_page > page_count else last_page
document = self.__process_pdf(path=path, start_page=first_tabby_page, end_page=last_tabby_page)

document = self.__process_pdf(path=path)
pages = document.get("pages", [])
for page in pages[first_page:last_page]:
page_lines = self.__get_lines_with_location(page, file_hash)
Expand Down
Binary file not shown.

0 comments on commit 5e37d6c

Please sign in to comment.