Skip to content

Commit

Permalink
Merge pull request #365 from cglewis/master
Browse files Browse the repository at this point in the history
remove unused code, set public sessions
  • Loading branch information
cglewis authored Apr 16, 2019
2 parents 07d5481 + 5db377e commit a8a70cb
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 96 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ python:
install:
- pip install codecov
env:
- PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/bin:$PYTHONPATH PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/networkml:$PYTHONPATH REDIS_HOST=localhost
- PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/bin:$PYTHONPATH PYTHONPATH=/home/travis/build/$TRAVIS_REPO_SLUG/networkml:$PYTHONPATH REDIS_HOST=localhost POSEIDON_PUBLIC_SESSIONS=1
jobs:
include:
- stage: test
Expand Down
24 changes: 0 additions & 24 deletions networkml/parsers/pcap/pcap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,30 +335,6 @@ def clean_dict(sessions, source_address):
return cleaned_sessions, source_address


def create_inputs(labels, session, seq_len, num_chars=16):
'''
Creates model inputs from a set of labels session
'''
L = np.zeros((1, len(labels)))
X = np.zeros((1, len(session), seq_len, num_chars))

# Create the packet input
hex_str = '0123456789abcdef'
for i, _ in enumerate(session):
raw_hex = session[i][1]
for j, c in enumerate(raw_hex):
if j < seq_len:
char_id = hex_str.index(c)
X[0, i, j, char_id] = 1

# Create the label input
classification = sorted(labels, key=lambda x: x[0])
class_array = [p for c, p in classification]
L[0] = np.asarray(class_array)

return X, L


def get_length(packet):
"""
Gets the total length of the packet
Expand Down
14 changes: 0 additions & 14 deletions networkml/parsers/pcap/session_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ def __init__(
self.train_length = self.X_train.shape[0]
self.validation_length = self.X_vala.shape[0]
self.test_length = self.X_test.shape[0]
# self._normalize()

self.perturb_types = perturb_types

def _load_data(self):
Expand Down Expand Up @@ -163,18 +161,6 @@ def _vectorize(self, session):

return X, y

def _normalize(self):
means = np.mean(self.X_train, axis=(0, 1))
stds = np.std(self.X_train, axis=(0, 1))

means[0:5] = 0
means[11:] = 0
stds[0:5] = 1
stds[11:] = 1

self.means = means
self.stds = stds

def _swap_ports(self, X):
'''
Swaps ports in a single session in the sequence
Expand Down
57 changes: 0 additions & 57 deletions networkml/utils/training_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import sys

import numpy as np
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
Expand Down Expand Up @@ -213,59 +212,3 @@ def select_features(X, y):

return [i for i, score in enumerate(selection_forest.feature_importances_)
if score > threshold]


def whiten_features(X):
'''
Fits the whitening transformation for the features X. and returns the
associated matrix.
Args:
X: numpy 2D array containing features
Returns:
whitening_transformation: Transformation to whiten features
'''

# Use PCA to create a whitening transformation fit to the training set
whitening_transformation = PCA(whiten=False)
whitening_transformation.fit(X)

return whitening_transformation


def choose_regularization(X, y):
'''
Chooses a value for the regularization parameter using grid search and
cross validation.
Args:
X: numpy 2D array of model inputs
y: numpy 1D array of labels
Returns:
C: Selected value of the regulatization coefficient
'''

# Set up the grid search
max_C, step_size = 10, 5
best_score, C = 0, 0
trial_Cs = [i/step_size for i in range(1, max_C*step_size + 1)]

# Grid search with cross validation to get C
for trial in trial_Cs:
model = LogisticRegression(
C=trial,
multi_class='multinomial',
solver='newton-cg',
class_weight='balanced',
random_state=0,
max_iter=1000
)
scores = cross_val_score(model, X, y, cv=10)
score = scores.mean()
if score > best_score:
best_score = score,
C = trial

return C
4 changes: 4 additions & 0 deletions tests/test_networkml.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import sys

import pytest
Expand All @@ -17,6 +18,7 @@ def test_networkml_eval_onelayer():

def test_networkml_eval_randomforest():
sys.argv = ['bin/networkml', '-p', 'tests/', '-a', 'randomforest']
os.environ['POSEIDON_PUBLIC_SESSIONS'] = ''
netml = NetworkML()


Expand All @@ -36,6 +38,7 @@ def test_networkml_train_onelayer():
def test_networkml_train_randomforest():
sys.argv = ['bin/networkml', '-p', 'tests/',
'-o', 'train', '-a', 'randomforest', '-m', 'networkml/trained_models/randomforest/RandomForestModel.pkl']
os.environ['POSEIDON_PUBLIC_SESSIONS'] = ''
with pytest.raises(SystemExit) as pytest_wrapped_e:
netml = NetworkML()
assert pytest_wrapped_e.type == SystemExit
Expand All @@ -59,4 +62,5 @@ def test_networkml_test_onelayer():
def test_networkml_test_randomforest():
sys.argv = ['bin/networkml', '-p', 'tests/',
'-o', 'test', '-a', 'randomforest', '-m', 'networkml/trained_models/randomforest/RandomForestModel.pkl']
os.environ['POSEIDON_PUBLIC_SESSIONS'] = ''
netml = NetworkML()

0 comments on commit a8a70cb

Please sign in to comment.