From c3ad8f40867c976929459b1b92b01b0fc2d6ddda Mon Sep 17 00:00:00 2001 From: bosd Date: Thu, 19 Dec 2024 20:00:54 +0100 Subject: [PATCH 1/2] [REF]: Use 'in' method instead of regex Pythons in method can be a lot faster instead of finding it with a regex. Accoring to the Pycon talk about regex performance, the built in is 1360% faster. --- src/invoice2data/extract/invoice_template.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/invoice2data/extract/invoice_template.py b/src/invoice2data/extract/invoice_template.py index 38c30660..c5d729f0 100644 --- a/src/invoice2data/extract/invoice_template.py +++ b/src/invoice2data/extract/invoice_template.py @@ -128,24 +128,24 @@ def matches_input(self, extracted_str: str) -> bool: Returns: bool: True if the extracted string matches the template keywords, - False otherwise. + False otherwise. """ - if all([re.search(keyword, extracted_str) for keyword in self["keywords"]]): - # All keyword patterns matched + if all([keyword in extracted_str for keyword in self["keywords"]]): + # All keywords found if self["exclude_keywords"]: if any( [ - re.search(exclude_keyword, extracted_str) + exclude_keyword in extracted_str for exclude_keyword in self["exclude_keywords"] ] ): - # At least one exclude_keyword matches + # At least one exclude_keyword found logger.debug( "Template: %s | Keywords matched. Exclude keyword found!", self["template_name"], ) return False - # No exclude_keywords or none match, template is good + # No exclude_keywords or none found, template is good logger.debug( "Template: %s | Keywords matched. No exclude keywords found.", self["template_name"], From 44ff3709311b6c1f4d5d954671041e85908ef1c1 Mon Sep 17 00:00:00 2001 From: bosd Date: Thu, 19 Dec 2024 20:08:05 +0100 Subject: [PATCH 2/2] [REF]: Update invoicetemplates no regexes are allowed as keywords --- .../extract/templates/com/com.flipkart.WSRetail.json | 2 +- src/invoice2data/extract/templates/nl/nl.be.coolblue.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/invoice2data/extract/templates/com/com.flipkart.WSRetail.json b/src/invoice2data/extract/templates/com/com.flipkart.WSRetail.json index 69ee14b7..42b3bb2b 100644 --- a/src/invoice2data/extract/templates/com/com.flipkart.WSRetail.json +++ b/src/invoice2data/extract/templates/com/com.flipkart.WSRetail.json @@ -6,7 +6,7 @@ "invoice_number": "InvoiceNo:(\\S+)", "order_id": "OrderID:(\\w{2}\\d{16,18})" }, - "keywords": ["flipkart", "WS\\s?Retail", "OD"], + "keywords": ["flipkart", "WS Retail", "OD"], "options": { "currency": "INR", "remove_whitespace": true diff --git a/src/invoice2data/extract/templates/nl/nl.be.coolblue.yml b/src/invoice2data/extract/templates/nl/nl.be.coolblue.yml index 8f2d7a85..501b68cb 100644 --- a/src/invoice2data/extract/templates/nl/nl.be.coolblue.yml +++ b/src/invoice2data/extract/templates/nl/nl.be.coolblue.yml @@ -107,7 +107,7 @@ fields: price_subtotal: float keywords: - Coolblue - - (NL810433941B01|BE0867686774) + - Klantnummer - factuur - € options: