diff --git a/dedoc/metadata_extractors/abstract_metadata_extractor.py b/dedoc/metadata_extractors/abstract_metadata_extractor.py index e278d84a..602ee68e 100644 --- a/dedoc/metadata_extractors/abstract_metadata_extractor.py +++ b/dedoc/metadata_extractors/abstract_metadata_extractor.py @@ -31,13 +31,13 @@ def extract_metadata(self, """ Extract metadata from file if possible, i.e. method :meth:`can_extract` returned True. - :type directory: path to the directory where the original and converted files are located - :type filename: name of the file after renaming (for example 23141.doc). \ + :param directory: path to the directory where the original and converted files are located + :param filename: name of the file after renaming (for example 23141.doc). \ The file gets a new name during processing by the dedoc manager (if used) - :type converted_filename: name of the file after renaming and conversion (for example 23141.docx) - :type original_filename: name of the file before renaming - :type parameters: additional parameters for document parsing - :type other_fields: other fields that should be added to the document's metadata - :return: document content with added metadata attribute (dict with information about the document) + :param converted_filename: name of the file after renaming and conversion (for example 23141.docx) + :param original_filename: name of the file before renaming + :param parameters: additional parameters for document parsing + :param other_fields: other fields that should be added to the document's metadata + :return: dict with metadata information about the document """ pass diff --git a/dedoc/metadata_extractors/metadata_extractor_composition.py b/dedoc/metadata_extractors/metadata_extractor_composition.py index 874f0f75..e9c182d4 100644 --- a/dedoc/metadata_extractors/metadata_extractor_composition.py +++ b/dedoc/metadata_extractors/metadata_extractor_composition.py @@ -24,7 +24,7 @@ def extract_metadata(self, parameters: Optional[dict] = None, other_fields: Optional[dict] = None) -> dict: """ - Add metadata to the document using one of the extractors if suitable extractor was found. + Extract metadata using one of the extractors if suitable extractor was found. Look to the method :meth:`~dedoc.metadata_extractors.AbstractMetadataExtractor.extract_metadata` of the class :class:`~dedoc.metadata_extractors.AbstractMetadataExtractor` documentation to get the information about method's parameters. """ diff --git a/docs/source/getting_started/usage.rst b/docs/source/getting_started/usage.rst index d6c6c498..1114cb87 100644 --- a/docs/source/getting_started/usage.rst +++ b/docs/source/getting_started/usage.rst @@ -204,7 +204,7 @@ To extract metadata, one can add them to the document using :meth:`~dedoc.metada .. literalinclude:: ../_static/code_examples/dedoc_usage_tutorial.py :language: python - :lines: 77-83 + :lines: 79-83 As we see, the attribute ``metadata`` has been filled with some metadata fields. The list of common fields for any metadata extractor along with the specific fields diff --git a/examples/create_unstructured_document.py b/examples/create_unstructured_document.py index 59e4d02a..cf724e1a 100644 --- a/examples/create_unstructured_document.py +++ b/examples/create_unstructured_document.py @@ -58,8 +58,5 @@ # HierarchyLevel(1, 1) for 1. # HierarchyLevel(1, 2) for 1.1. # HierarchyLevel(1, 4) for 1.2.1.1. and so on -metadata = BaseMetadataExtractor().extract_metadata(directory="./", - filename="example.docx", - converted_filename="example.doc", - original_filename="example.docx") +metadata = BaseMetadataExtractor().extract_metadata(directory="./", filename="example.docx", converted_filename="example.doc", original_filename="example.docx") unstructured_document.metadata = metadata