Merge branch 'release/0.1.0'

dice-group · Feb 24, 2022 · 65ac7ed · 65ac7ed
2 parents e06de9a + a1a3a7b
commit 65ac7ed
Show file tree

Hide file tree

Showing 13 changed files with 161 additions and 372 deletions.
diff --git a/README.md b/README.md
@@ -1,23 +1,28 @@
 # Knowledge Graph Embeddings at Scale
-This open-source project facilitates learning embeddings of large knowledge graphs. 
-To scale on large knowledge graphs, we rely on [DASK](https://dask.org/) and [PytorchLightning](https://www.pytorchlightning.ai/). 
-Through [DASK](https://dask.org/), we utilize multi-CPUs at processing the input data, while
-[PytorchLightning](https://www.pytorchlightning.ai/) allow us to use knowledge graph embedding model in hardware-agnostic manner.
+
+This open-source project is designed to ease real-world applications of knowledge graph embeddings. 
+Wit this aim, we apply rely on
+1. [DASK](https://dask.org/) to use multi-CPUs at preprocessing a large knowledge graph,
+2. [PytorchLightning](https://www.pytorchlightning.ai/) to learn knowledge graph embeddings via multi-CPUs, GPUs, TPUs or  computing cluster, and
+3. [Gradio](https://gradio.app/) to ease the deployment of pre-trained models.
+
 
 ### Installation
-First clone the repository:
+Clone the repository:
 ```
 git clone https://github.com/dice-group/DAIKIRI-Embedding.git
 ```
-Then obtain the required libraries:
+Install dependencies via conda:
 ```
 conda env create -f environment.yml
 conda activate daikiri
-wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip
-unzip KGs.zip
-python -m pytest -x tests
 ```
-### Manuel Installation
+or via pip:
+```
+# ensure that python 3.9 is available
+pip install -r requirements.txt
+```
+or manually
 ```
 conda create -n daikiri python=3.9
 conda activate daikiri
@@ -28,6 +33,9 @@ pip install scikit-learn==1.0.2
 pip install pytest==6.2.5
 pip install gradio==2.7.5.2
 pip install pyarrow==6.0.1
+```
+To test the Installation
+```
 wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip
 unzip KGs.zip
 python -m pytest -x tests
@@ -62,20 +70,11 @@ Please contact:  ```[email protected] ``` or ```[email protected] ``` , if
 - For more please look at [Hobbit Data](https://hobbitdata.informatik.uni-leipzig.de/KGE/)
 
 ### Available Models
-1. Multiplicative based KGE models:
-   1. [DistMult](https://arxiv.org/pdf/1412.6575.pdf)
-   2. [ComplEx](https://arxiv.org/pdf/1606.06357.pdf)
-   3. [QMult](https://proceedings.mlr.press/v157/demir21a.html)
-   4. [OMult](https://proceedings.mlr.press/v157/demir21a.html) 
-2. Feed Forward Neural Models 
-   1. [Shallom](https://arxiv.org/pdf/2101.09090.pdf)
-3. Convolutional Neural models
-   1. [ConEx](https://openreview.net/forum?id=6T45-4TFqaX&invitationId=eswc-conferences.org/ESWC/2021/Conference/Research_Track/Paper49/-/Camera_Ready_Revision&referrer=%5BTasks%5D(%2Ftasks))
-   2. [ConvQ](https://proceedings.mlr.press/v157/demir21a.html) 
-   3. [ConvO](https://proceedings.mlr.press/v157/demir21a.html)
+1. Multiplicative based KGE models: [DistMult](https://arxiv.org/pdf/1412.6575.pdf), [ComplEx](https://arxiv.org/pdf/1606.06357.pdf), [QMult](https://proceedings.mlr.press/v157/demir21a.html), and [OMult](https://proceedings.mlr.press/v157/demir21a.html) 
+2. Feed Forward Neural Models: [Shallom](https://arxiv.org/pdf/2101.09090.pdf)
+3. Convolutional Neural models [ConEx](https://openreview.net/forum?id=6T45-4TFqaX&invitationId=eswc-conferences.org/ESWC/2021/Conference/Research_Track/Paper49/-/Camera_Ready_Revision&referrer=%5BTasks%5D(%2Ftasks)), [ConvQ](https://proceedings.mlr.press/v157/demir21a.html), [ConvO](https://proceedings.mlr.press/v157/demir21a.html)
 4. Contact us to add your favorite one :)
 
-
 ### Training
 1. A dataset must be located in a folder, e.g. 'KGs/YAGO3-10'.
 

diff --git a/continuous_training.py b/continuous_training.py
@@ -20,7 +20,7 @@ def __init__(self, args):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(add_help=False)
     # Dataset and storage related
-    parser.add_argument("--path_experiment_folder", type=str, default="DAIKIRI_Storage/2022-02-04 15:02:25.958956",
+    parser.add_argument("--path_experiment_folder", type=str, default="DAIKIRI_Storage/2022-02-24 12:17:41.555572",
                         help="The path of a folder containing pretrained model")
     # Training Parameters
     parser.add_argument("--num_epochs", type=int, default=10,

diff --git a/core/callbacks.py b/core/callbacks.py
@@ -0,0 +1,15 @@
+# 1. Create Pytorch-lightning Trainer object from input configuration
+from pytorch_lightning.callbacks import Callback
+
+
+class PrintCallback(Callback):
+    def __init__(self):
+        super().__init__()
+
+    def on_train_start(self, trainer, model):
+        print(model)
+        print(model.summarize())
+        print("Training is started!")
+
+    def on_train_end(self, trainer, pl_module):
+        print("\nTraining is done.")
diff --git a/core/executer.py b/core/executer.py
@@ -4,6 +4,7 @@
 from .helper_classes import LabelRelaxationLoss, LabelSmoothingLossCanonical
 from .dataset_classes import StandardDataModule, KvsAll, CVDataModule
 from .knowledge_graph import KG
+from .callbacks import PrintCallback
 import torch
 from torch import nn
 from torch.nn import functional as F
@@ -18,7 +19,6 @@
 import dask.dataframe as dd
 import time
 from pytorch_lightning.plugins import DDPPlugin
-from pytorch_lightning.callbacks import Callback
 from pytorch_lightning import Trainer, seed_everything
 import logging
 from collections import defaultdict
@@ -28,6 +28,7 @@
 warnings.filterwarnings(action="ignore", category=DeprecationWarning)
 seed_everything(1, workers=True)
 
+
 # TODO: Execute can inherit from Trainer and Evaluator Classes
 # By doing so we can increase the modularity of our code.
 class Execute:
@@ -69,10 +70,10 @@ def read_input_data(args) -> KG:
         return kg
 
     @staticmethod
-    def reload_input_data(p: str) -> KG:
+    def reload_input_data(storage_path: str) -> KG:
         # 1. Read & Parse input data
         print("1. Reload Parsed Input Data")
-        return KG(deserialize_flag=p)
+        return KG(deserialize_flag=storage_path)
 
     def start(self) -> dict:
         """
@@ -118,11 +119,13 @@ def train_and_eval(self) -> BaseKGE:
         2c. Train a model
         """
         print('------------------- Train & Eval -------------------')
-        # 1. Create Pytorch-lightning Trainer object from input configuration
+
+
         if self.args.gpus:
             self.trainer = pl.Trainer.from_argparse_args(self.args, plugins=[DDPPlugin(find_unused_parameters=False)])
         else:
-            self.trainer = pl.Trainer.from_argparse_args(self.args)
+            self.trainer = pl.Trainer.from_argparse_args(self.args,callbacks=[PrintCallback()])
+
         # 2. Check whether validation and test datasets are available.
         if self.dataset.is_valid_test_available():
             if self.args.scoring_technique == 'NegSample':
@@ -249,11 +252,7 @@ def get_batch_1_to_N(self, input_vocab, triples, idx, output_dim) -> Tuple[np.ar
 
     @staticmethod
     def model_fitting(trainer, model, train_dataloaders) -> None:
-        print(model)
-        print(model.summarize())
-        print("Model fitting...")
         trainer.fit(model, train_dataloaders=train_dataloaders)
-        print("Done!")
 
     def training_kvsall(self):
         """