Skip to content

Commit

Permalink
Merge pull request #146 from dice-group/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
Demirrr authored Sep 8, 2023
2 parents 0289c05 + fdf7db2 commit 66ff1cc
Show file tree
Hide file tree
Showing 24 changed files with 468 additions and 325 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/github-actions-python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9.17"]

steps:
- uses: actions/checkout@v3
Expand All @@ -19,13 +19,12 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install -r requirements.txt
- name: Lint with ruff
run: |
ruff --format=github --select=F63,F7,F82 --target-version=py310 dicee/.
ruff --format=github --select=F63,F7,F82 --target-version=py39 dicee/.
- name: Test with pytest
run: |
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip --no-check-certificate
unzip KGs.zip
pytest -p no:warnings -x
12 changes: 3 additions & 9 deletions .github/workflows/sphinx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,12 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: "3.10"
# Runs a single command using the runners shell
- name: Run a one-line script
run: echo Hello, world!


- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Build HTML
- name: Build HTML and import
run: |
sphinx-apidoc -o docs dicee/ && make -C docs/ html
- name: Run ghp-import
run: |
mv docs/_build/html docs/ && ghp-import -n -p -f docs/html
sphinx-apidoc -o docs dicee/ && make -C docs/ html && mv docs/_build/html docs/ && ghp-import -n -p -f docs/html
21 changes: 2 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,33 +34,16 @@ Deploy a pre-trained embedding model without writing a single line of code.

``` bash
git clone https://github.com/dice-group/dice-embeddings.git
conda create -n dice python=3.10 --no-default-packages && conda activate dice
conda create -n dice python=3.9 --no-default-packages && conda activate dice
pip3 install -r requirements.txt
```
or
```bash
pip install dicee
```
or
```bash
pip3 install "torch>=2.0.0"
pip3 install "pandas>=1.5.1"
pip3 install "polars>=0.16.14"
pip3 install "scikit-learn>=1.2.2"
pip3 install "pyarrow>=11.0.0"
pip3 install "pytorch-lightning==1.6.4"
pip3 install "pykeen==1.10.1"
pip3 install "zstandard>=0.21.0"
pip3 install "pytest>=7.2.2"
pip3 install "psutil>=5.9.4"
pip3 install "ruff>=0.0.284"
pip3 install "gradio>=3.23.0"
pip3 install "rdflib>=7.0.0"
```

To test the Installation
```bash
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip
wget https://hobbitdata.informatik.uni-leipzig.de/KG/KGs.zip --no-check-certificate
unzip KGs.zip
pytest -p no:warnings -x # it takes circa 15 minutes
pytest -p no:warnings --lf # run only the last failed test
Expand Down
119 changes: 55 additions & 64 deletions analyse_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,18 @@
import json
import pandas as pd
import sys
import argparse


# print('Number of arguments:', len(sys.argv), 'arguments.')
# print('Argument List:', str(sys.argv))


if len(sys.argv) > 1:
input_str_path = sys.argv[1]
else:
# (1) Give a path of Experiments folder
input_str_path = 'Experiments/'

# (2) Get all subfolders
sub_folder_str_paths = os.listdir(input_str_path)

results = dict()

experiments = []
for path in sub_folder_str_paths:
try:
with open(input_str_path + path + '/configuration.json', 'r') as f:
config = json.load(f)
config = {i: config[i] for i in
['model', 'full_storage_path', 'embedding_dim',
'normalization', 'num_epochs', 'batch_size', 'lr',
'callbacks',
'scoring_technique',
'path_dataset_folder', 'p', 'q']}
except FileNotFoundError:
print('Exception occured at reading config')
continue

try:
with open(input_str_path + path + '/report.json', 'r') as f:
report = json.load(f)
report = {i: report[i] for i in ['Runtime','NumParam']}
except FileNotFoundError:
print('Exception occured at reading report')
continue

try:
with open(input_str_path + path + '/eval_report.json', 'r') as f:
eval_report = json.load(f)
# print(eval_report)
# exit(1)
# eval_report = {i: str(eval_report[i]) for i in ['Train', 'Val', 'Test']}
except FileNotFoundError:
print('Exception occured at reading eval_report')
continue

config.update(eval_report)
config.update(report)
experiments.append(config)
def get_default_arguments(description=None):
parser = argparse.ArgumentParser(add_help=False)
# Default Trainer param https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#methods
# Data related arguments
parser.add_argument("--dir", type=str, default="KINSHIP-DistMult-RN/",
help="")
if description is None:
return parser.parse_args()
return parser.parse_args(description)


# need a class to hold all params
Expand All @@ -63,7 +22,7 @@ def __init__(self):
self.model_name = []
self.callbacks = []
self.embedding_dim = []
self.num_params=[]
self.num_params = []
self.num_epochs = []
self.batch_size = []
self.lr = []
Expand All @@ -87,6 +46,7 @@ def __init__(self):

self.runtime = []
self.normalization = []
self.scoring_technique = []

def save_experiment(self, x):
self.model_name.append(x['model'])
Expand All @@ -100,6 +60,7 @@ def save_experiment(self, x):
self.num_params.append(x['NumParam'])

self.normalization.append(x['normalization'])
self.scoring_technique.append(x['scoring_technique'])
self.callbacks.append(x['callbacks'])

self.train_mrr.append(x['Train']['MRR'])
Expand All @@ -122,32 +83,62 @@ def save_experiment(self, x):

def to_df(self):
return pd.DataFrame(
dict(model_name=self.model_name, #pq=self.pq, path_dataset_folder=self.path_dataset_folder,
dict(model_name=self.model_name, # pq=self.pq, path_dataset_folder=self.path_dataset_folder,
train_mrr=self.train_mrr, train_h1=self.train_h1,
train_h3=self.train_h3, train_h10=self.train_h10,
#full_storage_path=self.full_storage_path,
# full_storage_path=self.full_storage_path,
val_mrr=self.val_mrr, val_h1=self.val_h1,
val_h3=self.val_h3, val_h10=self.val_h10,
test_mrr=self.test_mrr, test_h1=self.test_h1,
test_h3=self.test_h3, test_h10=self.test_h10,
runtime=self.runtime,
params=self.num_params,
callbacks=self.callbacks,
#normalization=self.normalization,
#embeddingdim=self.embedding_dim
# normalization=self.normalization,
# embeddingdim=self.embedding_dim
scoring_technique=self.scoring_technique
)
)


counter = Experiment()
def analyse(args):
# (2) Get all subfolders
sub_folder_str_paths = os.listdir(args.dir)
experiments = []
for path in sub_folder_str_paths:
full_path=args.dir +"/"+path
with open(f'{full_path}/configuration.json', 'r') as f:
config = json.load(f)
config = {i: config[i] for i in
['model', 'full_storage_path', 'embedding_dim',
'normalization', 'num_epochs', 'batch_size', 'lr',
'callbacks',
'scoring_technique',
"scoring_technique",
'path_dataset_folder', 'p', 'q']}
with open(f'{full_path}/report.json', 'r') as f:
report = json.load(f)
report = {i: report[i] for i in ['Runtime', 'NumParam']}
with open(f'{full_path}/eval_report.json', 'r') as f:
eval_report = json.load(f)

config.update(eval_report)
config.update(report)
experiments.append(config)

counter = Experiment()

for i in experiments:
counter.save_experiment(i)
for i in experiments:
counter.save_experiment(i)

df = counter.to_df()
df.sort_values(by=['test_mrr'], ascending=False, inplace=True)
pd.set_option("display.precision", 3)
# print(df)
print(df.to_latex(index=False, float_format="%.3f"))
#print(df.to_markdown(index=False))
df.to_csv(path_or_buf=args.dir+'/summary.csv')

df = counter.to_df()
pd.set_option("display.precision", 3)
#print(df)
print(df.to_latex(index=False,float_format="%.3f"))

print(df.to_markdown(index=False))
if __name__ == '__main__':
analyse(get_default_arguments())
Loading

0 comments on commit 66ff1cc

Please sign in to comment.