From c505eaa4c53d6e12115c6edaeb9dbb4f03ba5f03 Mon Sep 17 00:00:00 2001 From: Nasty Date: Fri, 16 Aug 2024 15:20:06 +0300 Subject: [PATCH] TLDR-748 finishing fixes --- dedoc/api/api_args.py | 2 +- dedoc/api/web/index.html | 26 ++++++++- .../default_structure_extractor.py | 40 +++++++------ .../structure_extractors/patterns/__init__.py | 3 +- .../patterns/abstract_pattern.py | 45 ++------------ .../patterns/bracket_list_pattern.py | 6 +- .../patterns/bracket_roman_list_pattern.py | 6 +- .../patterns/bullet_list_pattern.py | 6 +- .../patterns/dotted_list_pattern.py | 14 ++--- .../patterns/letter_list_pattern.py | 6 +- .../patterns/regexp_pattern.py | 8 ++- .../patterns/roman_list_pattern.py | 6 +- .../patterns/start_word_pattern.py | 8 ++- .../patterns/tag_header_pattern.py | 25 +++++++- .../patterns/tag_list_pattern.py | 22 +++++-- .../patterns/tag_pattern.py | 58 +++++++++++++++++++ .../patterns/tag_type_pattern.py | 18 ------ dedoc/structure_extractors/patterns/utils.py | 15 +++-- dedoc/utils/parameter_utils.py | 7 +++ tests/api_tests/test_api_doctype_default.py | 29 ++++++++-- 20 files changed, 215 insertions(+), 135 deletions(-) delete mode 100644 dedoc/structure_extractors/patterns/tag_type_pattern.py diff --git a/dedoc/api/api_args.py b/dedoc/api/api_args.py index cd75b6e5..8ffdc7b9 100644 --- a/dedoc/api/api_args.py +++ b/dedoc/api/api_args.py @@ -8,7 +8,7 @@ class QueryParameters: # type of document structure parsing document_type: str = Form("", enum=["", "law", "tz", "diploma", "article", "fintoc"], description="Document domain") - patterns: str = Form(None, description='Patterns for default document type (when document_type="")') + patterns: str = Form("", description='Patterns for default document type (when document_type="")') structure_type: str = Form("tree", enum=["linear", "tree"], description="Output structure type") return_format: str = Form("json", enum=["json", "html", "plain_text", "tree", "collapsed_tree", "ujson", "pretty_json"], description="Response representation, most types (except json) are used for debug purposes only") diff --git a/dedoc/api/web/index.html b/dedoc/api/web/index.html index 423dbcfe..d0c8b984 100644 --- a/dedoc/api/web/index.html +++ b/dedoc/api/web/index.html @@ -28,7 +28,7 @@

Parameters configuration

Type of document structure parsing

-
document_type, structure_type, return_format +
document_type, patterns, structure_type, return_format

Type of document structure parsing

+

+

+ Patterns for default structure extractor (document_type="other")
+
+ +
+

+