Merge pull request #365 from cglewis/master

remove unused code, set public sessions
faucetsdn · Apr 16, 2019 · a8a70cb · a8a70cb
2 parents 07d5481 + 5db377e
commit a8a70cb
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 96 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -9,7 +9,7 @@ python:
 install:
   - pip install codecov
 env:
-  - PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/bin:$PYTHONPATH PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/networkml:$PYTHONPATH REDIS_HOST=localhost
+  - PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/bin:$PYTHONPATH PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/networkml:$PYTHONPATH REDIS_HOST=localhost POSEIDON_PUBLIC_SESSIONS=1
 jobs:
   include:
   - stage: test

diff --git a/networkml/parsers/pcap/pcap_utils.py b/networkml/parsers/pcap/pcap_utils.py
@@ -335,30 +335,6 @@ def clean_dict(sessions, source_address):
     return cleaned_sessions, source_address
 
 
-def create_inputs(labels, session, seq_len, num_chars=16):
-    '''
-    Creates model inputs from a set of labels session
-    '''
-    L = np.zeros((1, len(labels)))
-    X = np.zeros((1, len(session), seq_len, num_chars))
-
-    # Create the packet input
-    hex_str = '0123456789abcdef'
-    for i, _ in enumerate(session):
-        raw_hex = session[i][1]
-        for j, c in enumerate(raw_hex):
-            if j < seq_len:
-                char_id = hex_str.index(c)
-                X[0, i, j, char_id] = 1
-
-    # Create the label input
-    classification = sorted(labels, key=lambda x: x[0])
-    class_array = [p for c, p in classification]
-    L[0] = np.asarray(class_array)
-
-    return X, L
-
-
 def get_length(packet):
     """
     Gets the total length of the packet

diff --git a/networkml/parsers/pcap/session_iterator.py b/networkml/parsers/pcap/session_iterator.py
@@ -64,8 +64,6 @@ def __init__(
         self.train_length = self.X_train.shape[0]
         self.validation_length = self.X_vala.shape[0]
         self.test_length = self.X_test.shape[0]
-        # self._normalize()
-
         self.perturb_types = perturb_types
 
     def _load_data(self):
@@ -163,18 +161,6 @@ def _vectorize(self, session):
 
         return X, y
 
-    def _normalize(self):
-        means = np.mean(self.X_train, axis=(0, 1))
-        stds = np.std(self.X_train, axis=(0, 1))
-
-        means[0:5] = 0
-        means[11:] = 0
-        stds[0:5] = 1
-        stds[11:] = 1
-
-        self.means = means
-        self.stds = stds
-
     def _swap_ports(self, X):
         '''
         Swaps ports in a single session in the sequence

diff --git a/networkml/utils/training_utils.py b/networkml/utils/training_utils.py
@@ -7,7 +7,6 @@
 import sys
 
 import numpy as np
-from sklearn.decomposition import PCA
 from sklearn.ensemble import ExtraTreesClassifier
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import cross_val_score
@@ -213,59 +212,3 @@ def select_features(X, y):
 
     return [i for i, score in enumerate(selection_forest.feature_importances_)
             if score > threshold]
-
-
-def whiten_features(X):
-    '''
-    Fits the whitening transformation for the features X. and returns the
-    associated matrix.
-
-    Args:
-        X: numpy 2D array containing features
-
-    Returns:
-        whitening_transformation: Transformation to whiten features
-    '''
-
-    # Use PCA to create a whitening transformation fit to the training set
-    whitening_transformation = PCA(whiten=False)
-    whitening_transformation.fit(X)
-
-    return whitening_transformation
-
-
-def choose_regularization(X, y):
-    '''
-    Chooses a value for the regularization parameter using grid search and
-    cross validation.
-
-    Args:
-        X: numpy 2D array of model inputs
-        y: numpy 1D array of labels
-
-    Returns:
-        C: Selected value of the regulatization coefficient
-    '''
-
-    # Set up the grid search
-    max_C, step_size = 10, 5
-    best_score, C = 0, 0
-    trial_Cs = [i/step_size for i in range(1, max_C*step_size + 1)]
-
-    # Grid search with cross validation to get C
-    for trial in trial_Cs:
-        model = LogisticRegression(
-            C=trial,
-            multi_class='multinomial',
-            solver='newton-cg',
-            class_weight='balanced',
-            random_state=0,
-            max_iter=1000
-        )
-        scores = cross_val_score(model, X, y, cv=10)
-        score = scores.mean()
-        if score > best_score:
-            best_score = score,
-            C = trial
-
-    return C
diff --git a/tests/test_networkml.py b/tests/test_networkml.py
@@ -1,3 +1,4 @@
+import os
 import sys
 
 import pytest
@@ -17,6 +18,7 @@ def test_networkml_eval_onelayer():
 
 def test_networkml_eval_randomforest():
     sys.argv = ['bin/networkml', '-p', 'tests/', '-a', 'randomforest']
+    os.environ['POSEIDON_PUBLIC_SESSIONS'] = ''
     netml = NetworkML()
 
 
@@ -36,6 +38,7 @@ def test_networkml_train_onelayer():
 def test_networkml_train_randomforest():
     sys.argv = ['bin/networkml', '-p', 'tests/',
                 '-o', 'train', '-a', 'randomforest', '-m', 'networkml/trained_models/randomforest/RandomForestModel.pkl']
+    os.environ['POSEIDON_PUBLIC_SESSIONS'] = ''
     with pytest.raises(SystemExit) as pytest_wrapped_e:
         netml = NetworkML()
     assert pytest_wrapped_e.type == SystemExit
@@ -59,4 +62,5 @@ def test_networkml_test_onelayer():
 def test_networkml_test_randomforest():
     sys.argv = ['bin/networkml', '-p', 'tests/',
                 '-o', 'test', '-a', 'randomforest', '-m', 'networkml/trained_models/randomforest/RandomForestModel.pkl']
+    os.environ['POSEIDON_PUBLIC_SESSIONS'] = ''
     netml = NetworkML()