From 3207daa7bd6b59e7c466970b92923a640ed8e648 Mon Sep 17 00:00:00 2001 From: Pierre-Loic Doulcet Date: Tue, 10 Sep 2024 10:20:57 -0700 Subject: [PATCH 1/6] do not attach a filepath when a stram of bytes is passed --- llama_parse/base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llama_parse/base.py b/llama_parse/base.py index 8468f40..4046a86 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -419,7 +419,8 @@ async def _aget_json( result = await self._get_job_result(job_id, "json") result["job_id"] = job_id - result["file_path"] = file_path + if isinstance(file_path, str): + result["file_path"] = file_path return [result] except Exception as e: @@ -506,7 +507,11 @@ async def aget_images( image["path"] = image_path image["job_id"] = job_id - image["original_pdf_path"] = result["file_path"] + + if isinstance(result["file_path"], str): + image["original_file_path"] = result["file_path"] + else: + image["original_file_path"] = None image["page_number"] = page["page"] with open(image_path, "wb") as f: image_url = f"{self.base_url}/api/parsing/job/{job_id}/result/image/{image_name}" From 1c453397de08b665d36dc1b8d369459f4154e5a2 Mon Sep 17 00:00:00 2001 From: Pierre-Loic Doulcet Date: Tue, 10 Sep 2024 10:30:28 -0700 Subject: [PATCH 2/6] trailing whitespaces --- llama_parse/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llama_parse/base.py b/llama_parse/base.py index 4046a86..8db4161 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -416,13 +416,13 @@ async def _aget_json( job_id = await self._create_job(file_path, extra_info=extra_info) if self.verbose: print("Started parsing the file under job_id %s" % job_id) - result = await self._get_job_result(job_id, "json") result["job_id"] = job_id + if isinstance(file_path, str): result["file_path"] = file_path + return [result] - except Exception as e: file_repr = file_path if isinstance(file_path, str) else "" print(f"Error while parsing the file '{file_repr}':", e) @@ -512,6 +512,7 @@ async def aget_images( image["original_file_path"] = result["file_path"] else: image["original_file_path"] = None + image["page_number"] = page["page"] with open(image_path, "wb") as f: image_url = f"{self.base_url}/api/parsing/job/{job_id}/result/image/{image_name}" From 42a98f921ab3a7b4557085ea7dc9db05e4c842ff Mon Sep 17 00:00:00 2001 From: Pierre-Loic Doulcet Date: Tue, 10 Sep 2024 10:30:48 -0700 Subject: [PATCH 3/6] Update llama_parse/base.py Co-authored-by: Logan --- llama_parse/base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llama_parse/base.py b/llama_parse/base.py index 8db4161..3ebf0ad 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -508,10 +508,7 @@ async def aget_images( image["path"] = image_path image["job_id"] = job_id - if isinstance(result["file_path"], str): - image["original_file_path"] = result["file_path"] - else: - image["original_file_path"] = None + image["original_file_path"] = result.get("file_path", None) image["page_number"] = page["page"] with open(image_path, "wb") as f: From 8aa08c590ccb063bf447c6bb6417ca9c7c204834 Mon Sep 17 00:00:00 2001 From: Pierre-Loic Doulcet Date: Tue, 10 Sep 2024 10:30:53 -0700 Subject: [PATCH 4/6] Update llama_parse/base.py Co-authored-by: Logan --- llama_parse/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_parse/base.py b/llama_parse/base.py index 3ebf0ad..eb2aa27 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -419,8 +419,8 @@ async def _aget_json( result = await self._get_job_result(job_id, "json") result["job_id"] = job_id - if isinstance(file_path, str): - result["file_path"] = file_path + if not isinstance(file_path, (bytes, BufferedIOBase)): + result["file_path"] = str(file_path) return [result] except Exception as e: From f603f507b682c6a0dde78adb41e1b21bf000c633 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Tue, 10 Sep 2024 11:32:50 -0600 Subject: [PATCH 5/6] linting --- llama_parse/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llama_parse/base.py b/llama_parse/base.py index eb2aa27..9937963 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -421,7 +421,7 @@ async def _aget_json( if not isinstance(file_path, (bytes, BufferedIOBase)): result["file_path"] = str(file_path) - + return [result] except Exception as e: file_repr = file_path if isinstance(file_path, str) else "" @@ -507,9 +507,9 @@ async def aget_images( image["path"] = image_path image["job_id"] = job_id - + image["original_file_path"] = result.get("file_path", None) - + image["page_number"] = page["page"] with open(image_path, "wb") as f: image_url = f"{self.base_url}/api/parsing/job/{job_id}/result/image/{image_name}" From b706363f114707d12f8ff462fbb3d16cdbeb277b Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Tue, 10 Sep 2024 11:45:07 -0600 Subject: [PATCH 6/6] vbump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ef33b93..bcdacd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "llama-parse" -version = "0.5.4" +version = "0.5.5" description = "Parse files into RAG-Optimized formats." authors = ["Logan Markewich "] license = "MIT"