From 3fe6c12ddc7877639f86e0d5789d487aaddf7e67 Mon Sep 17 00:00:00 2001 From: stevo Date: Wed, 14 Jun 2023 13:13:33 +0200 Subject: [PATCH 1/3] initial updates --- requirements.txt | 45 ++++++++++++++++++++++----------------------- setup.cfg | 2 +- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/requirements.txt b/requirements.txt index 62bd293..a817339 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,24 +1,23 @@ # external requirements -Sphinx==2.4.3 -coverage==5.0.3 -awscli==1.18.13 -python-dotenv==0.5.1 -pandas==0.25.3 -h5py==2.10.0 -python-dateutil==2.8.0 -PyYAML==5.1.2 -keras==2.3.1 -keras-self-attention==0.42.0 -keras-tcn==3.0.1 -gevent==1.4.0 -requests==2.23.0 -numpy==1.18.1 -sympy==1.5.1 -statsmodels==0.11.1 -scikit-learn==0.22.1 -matplotlib==3.0.3 -seaborn==0.9.1 -joblib==0.14.1 -webargs<6.0.0,>=5.5.2 -arrow==0.15.5 -docutils==0.15.2 +Sphinx +coverage +awscli +python-dotenv +pandas +h5py +python-dateutil +PyYAML +keras-self-attention +keras-tcn +gevent +requests +numpy +sympy +statsmodels +scikit-learn +matplotlib +seaborn +joblib +webargs +arrow +docutils diff --git a/setup.cfg b/setup.cfg index 4c182bc..75015e4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ name = mods summary = Intelligent module using ML/DL techniques for underlying IDS and monitoring system description-file = README.md -version = 2.1.0 +version = 2.3.0 author = Giang Nguyen, Stefan Dlugolinsky author-email = giang.nguyen@savba.sk, stefan.dlugolinsky@savba.sk license = Apache-2 From 7db4bccc72a5d1a5c8d798dc0616e5f5da21db9b Mon Sep 17 00:00:00 2001 From: Stefan Dlugolinsky Date: Wed, 14 Jun 2023 13:08:14 +0000 Subject: [PATCH 2/3] update to Tensorflow 2.10.1, small fixes to read data from YYYY-MM.zip files --- mods/config.py | 2 +- mods/models/mods_model.py | 26 +++++++++++++------------- mods/utils.py | 15 ++++++++++++++- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/mods/config.py b/mods/config.py index 7f807f0..58656f3 100644 --- a/mods/config.py +++ b/mods/config.py @@ -85,7 +85,7 @@ def list_dir(dir, pattern='*.tsv'): app_data_remote = os.path.join(REMOTE_BASE_DIR, 'data') app_models_remote = os.path.join(REMOTE_BASE_DIR, 'models', EXPERIMENT_NAMESPACE) app_data = os.path.join(IN_OUT_BASE_DIR, 'data') -app_data_features = os.path.join(app_data, 'features') +app_data_features = os.path.join(app_data, 'datapools', 'mods') app_models = os.path.join(IN_OUT_BASE_DIR, 'models', EXPERIMENT_NAMESPACE) app_checkpoints = os.path.join(IN_OUT_BASE_DIR, 'checkpoints', EXPERIMENT_NAMESPACE) app_cache = os.path.join(IN_OUT_BASE_DIR, 'cache', EXPERIMENT_NAMESPACE) diff --git a/mods/models/mods_model.py b/mods/models/mods_model.py index 8a1a03d..30946ac 100644 --- a/mods/models/mods_model.py +++ b/mods/models/mods_model.py @@ -34,19 +34,19 @@ import keras import numpy as np import pandas as pd -from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint, TensorBoard -from keras.layers import Bidirectional -from keras.layers import Dense -from keras.layers import Flatten -from keras.layers import Input -from keras.layers import RepeatVector -from keras.layers.convolutional import Conv1D -from keras.layers.convolutional import MaxPooling1D -from keras.layers.recurrent import GRU -from keras.layers.recurrent import LSTM -from keras.models import Model -from keras.optimizers import Adam -from keras.preprocessing.sequence import TimeseriesGenerator +from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint, TensorBoard +from tensorflow.keras.layers import Bidirectional +from tensorflow.keras.layers import Dense +from tensorflow.keras.layers import Flatten +from tensorflow.keras.layers import Input +from tensorflow.keras.layers import RepeatVector +from tensorflow.keras.layers import Conv1D +from tensorflow.keras.layers import MaxPooling1D +from tensorflow.keras.layers import GRU +from tensorflow.keras.layers import LSTM +from tensorflow.keras.models import Model +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator from keras_self_attention import SeqSelfAttention from multiprocessing import Process from sklearn.preprocessing import MinMaxScaler diff --git a/mods/utils.py b/mods/utils.py index 17b78af..f452f1a 100644 --- a/mods/utils.py +++ b/mods/utils.py @@ -437,6 +437,12 @@ def datapool_read( ): protocols, merge_on_col = parse_data_specs(data_specs_str) + if isinstance(time_range, str): + logging.info('converting time_range ''%s'' to TimeRange' % time_range) + time_range = TimeRange.from_str(time_range) + logging.info('time_range converted to %s' % str(time_range)) + + # read dataset from cache cache_dir = None cache_key = None @@ -541,12 +547,16 @@ def datapool_read( if protocol not in df_protocol.keys(): df_protocol[protocol] = df else: - df_protocol[protocol] = df_protocol[protocol].append(df) + df_protocol[protocol] = pd.concat([df_protocol[protocol], df], axis=0) + logging.info('df_protocol[protocol].shape: %s' % str(df_protocol[protocol].shape)) for ds in protocols: protocol = ds['protocol'] + logging.info('protocol: %s' % protocol) # rename columns rename_rule = {x[0]: x[1] for x in ds['cols'] if len(x) == 2} + logging.info('rename_rule: %s' % str(rename_rule)) + logging.info('df_protocol.keys(): %s' % str(df_protocol.keys())) df_protocol[protocol] = df_protocol[protocol].rename(index=str, columns=rename_rule) # convert units: # from B to kB, MB, GB use _kB, MB, GB @@ -573,6 +583,9 @@ def datapool_read( df_main = df_main[keep_cols] dbg_df(df_main, 'debug', 'df_main', print=False, save=cfg.MODS_DEBUG_MODE) + logging.info('df_main.shape: %s' % str(df_main.shape)) + logging.info('df_main.columns: %s' % str(df_main.columns)) + # save dataset to cache if caching: assert cache_dir is not None From 1acf99701ad460fb265409bc10fe146f24230a22 Mon Sep 17 00:00:00 2001 From: Stefan Dlugolinsky Date: Wed, 14 Jun 2023 13:29:31 +0000 Subject: [PATCH 3/3] fixed requirements --- requirements.txt | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/requirements.txt b/requirements.txt index a817339..1331c75 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,23 +1,23 @@ # external requirements -Sphinx -coverage -awscli -python-dotenv -pandas -h5py -python-dateutil -PyYAML -keras-self-attention -keras-tcn -gevent -requests -numpy -sympy -statsmodels -scikit-learn -matplotlib -seaborn -joblib -webargs -arrow -docutils +Sphinx==5.3.0 +coverage==7.2.7 +awscli==1.27.153 +python-dotenv==1.0.0 +pandas==2.0.2 +h5py==3.7.0 +python-dateutil==2.8.2 +PyYAML==5.4.1 +keras-self-attention==0.51.0 +keras-tcn==3.5.0 +gevent==22.10.2 +requests==2.31.0 +numpy==1.23.4 +sympy==1.12 +statsmodels==0.14.0 +scikit-learn==1.2.2 +matplotlib==3.7.1 +seaborn==0.12.2 +joblib==1.2.0 +webargs==5.5.3 +arrow==1.2.3 +docutils==0.16