Fixed linting

run-llama · EmanuelCampos · Nov 22, 2023 · Nov 21, 2023 · Nov 21, 2023 · Nov 22, 2023
commit b7521f424ba714e2cd3729ae4fd4f0f0271fb962
diff --git a/llama_hub/file/unstructured/base.py b/llama_hub/file/unstructured/base.py
@@ -11,33 +11,34 @@
 from llama_index.readers.schema.base import Document
 import json
 
+
 class UnstructuredReader(BaseReader):
     """General unstructured text reader for a variety of files."""
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         """Init params."""
-        super().__init__(*args) # not passing kwargs to parent bc it cannot accept it
-        
-        self.api = False	# we default to local
+        super().__init__(*args)  # not passing kwargs to parent bc it cannot accept it
+
+        self.api = False  # we default to local
         if "url" in kwargs:
             self.server_url = str(kwargs["url"])
-            self.api = True	# is url was set, switch to api
+            self.api = True  # is url was set, switch to api
         else:
-            self.server_url = "http://localhost:8000" 
-        
+            self.server_url = "http://localhost:8000"
+
         if "api" in kwargs:
             self.api = kwargs["api"]
 
         self.api_key = ""
         if "api_key" in kwargs:
             self.api_key = kwargs["api_key"]
-        
+
         # Prerequisite for Unstructured.io to work
         import nltk
 
         nltk.download("punkt")
         nltk.download("averaged_perceptron_tagger")
-    
+
     """ Loads data usin Unstructured.io py
 
         Depending on the constructin if url is set or api = True
@@ -47,65 +48,63 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
         Returns list of documents  
     """
+
     def load_data(
         self,
         file: Path,
         extra_info: Optional[Dict] = None,
         split_documents: Optional[bool] = False,
     ) -> List[Document]:
-
-        """ If api is set, parse through api"""
-        if (self.api):
+        """If api is set, parse through api"""
+        if self.api:
             from unstructured.partition.api import partition_via_api
 
             elements = partition_via_api(
                 filename=str(file),
-                api_key = self.api_key,
-                api_url=self.server_url + "/general/v0/general"
+                api_key=self.api_key,
+                api_url=self.server_url + "/general/v0/general",
             )
         else:
-            """ Parse file locally """
+            """Parse file locally"""
             from unstructured.partition.auto import partition
 
-            elements= partition(filename=str(file))
+            elements = partition(filename=str(file))
 
         """ Process elements """
         docs = []
-        if (split_documents):
+        if split_documents:
             for node in elements:
                 metadata = {}
-                if (hasattr(node, "metadata")):
-                    """ Load metadata fields """
+                if hasattr(node, "metadata"):
+                    """Load metadata fields"""
                     for field, val in vars(node.metadata).items():
-                        if (field == "_known_field_names"):
+                        if field == "_known_field_names":
                             continue
                         # removing coordinates because it does not serialize
                         # and dont want to bother with it
-                        if (field == "coordinates"):
+                        if field == "coordinates":
                             continue
-                        # removing bc it might cause interference 
-                        if (field == "parent_id"):
+                        # removing bc it might cause interference
+                        if field == "parent_id":
                             continue
                         metadata[field] = val
-                
+
                 if extra_info is not None:
-                    metadata.update(extra_info)            
-                    
+                    metadata.update(extra_info)
+
                 metadata["filename"] = str(file)
                 docs.append(Document(text=node.text, extra_info=metadata))
-        
+
         else:
             text_chunks = [" ".join(str(el).split()) for el in elements]
 
             metadata = {}
-            
+
             if extra_info is not None:
-                metadata.update(extra_info)            
-                
+                metadata.update(extra_info)
+
             metadata["filename"] = str(file)
-            # Create a single document by joining all the texts            
-            docs.append(Document(text = "\n\n".join(text_chunks), extra_info=metadata))
-                
+            # Create a single document by joining all the texts
+            docs.append(Document(text="\n\n".join(text_chunks), extra_info=metadata))
+
         return docs
-
-