Skip to content

Commit

Permalink
Bug fixes for input parameter limiting
Browse files Browse the repository at this point in the history
  • Loading branch information
tjkessler committed Sep 2, 2018
1 parent 3a5c4d7 commit 9b8d0c8
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 22 deletions.
4 changes: 2 additions & 2 deletions ecnet/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# ecnet/data_utils.py
# v.1.5
# v.1.5.1
# Developed in 2018 by Travis Kessler <[email protected]>
#
# Contains the "DataFrame" class, and functions for processing/importing/
Expand Down Expand Up @@ -190,7 +190,7 @@ def shuffle(self, *args, split):
'''

if 'l' and 'v' and 't' in args:
self.create_sets(split=split)
self.create_sets(random=True, split=split)

elif 'l' and 'v' in args:
lv_set = []
Expand Down
21 changes: 12 additions & 9 deletions ecnet/limit_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# ecnet/limit_parameters.py
# v.1.5
# v.1.5.1
# Developed in 2018 by Travis Kessler <[email protected]>
#
# Contains the functions necessary for reducing the input dimensionality of a
Expand All @@ -20,7 +20,7 @@
import ecnet.error_utils


def limit_iterative_include(DataFrame, limit_num):
def limit_iterative_include(DataFrame, limit_num, print_feedback=True):
'''
Limits the dimensionality of input data found in supplied *DataFrame*
object to a dimensionality of *limit_num* using iterative inclusion
Expand Down Expand Up @@ -114,24 +114,27 @@ def limit_iterative_include(DataFrame, limit_num):
test_input_retained[idx].append(param[0])

retained_input_list.append(DataFrame.input_names[rmse_idx])
print(retained_input_list)
print(rmse_val)
print()
if print_feedback:
print(retained_input_list)
print(rmse_val)
print()

return retained_input_list


def limit_genetic(DataFrame, limit_num, population_size, num_survivors,
num_generations, shuffle=False, print_feedback=True):
num_generations, shuffle=False, data_split=[0.65, 0.25, 0.1],
print_feedback=True):
'''
Limits the dimensionality of input data found in supplied *DataFrame*
object to a dimensionality of *limit_num* using a genetic algorithm.
Optional arguments for *population_size* of genetic algorithm's population,
*num_survivors* for selecting the best performers from each population
generation to reproduce, *num_generations* for the number of times the
population will reproduce, *shuffle* for shuffling the data sets for each
population member, and *print_feedback* for printing the average fitness
score of the population after each generation.
population member, *data_split* to determine l/v/t splits if shuffling,
and *print_feedback* for printing the average fitness score of the
population after each generation.
'''

def ecnet_limit_inputs(feed_dict):
Expand All @@ -147,7 +150,7 @@ def ecnet_limit_inputs(feed_dict):
test_input = []

if shuffle:
DataFrame.shuffle('l', 'v', 't')
DataFrame.shuffle('l', 'v', 't', split=data_split)
packaged_data_cf = DataFrame.package_sets()
else:
packaged_data_cf = packaged_data
Expand Down
34 changes: 25 additions & 9 deletions ecnet/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# ecnet/server.py
# v.1.5
# v.1.5.1
# Developed in 2018 by Travis Kessler <[email protected]>
#
# Contains the "Server" class, which handles ECNet project creation, neural
Expand Down Expand Up @@ -128,7 +128,7 @@ def import_data(self, data_filename, sort_type='random',
def limit_input_parameters(self, limit_num, output_filename,
use_genetic=False, population_size=500,
num_survivors=200, num_generations=25,
shuffle=False):
shuffle=False, data_split=[0.65, 0.25, 0.1]):
'''
Limits the input dimensionality of currently loaded DataFrame; default
method is an iterative inclusion algorithm, options for using a genetic
Expand All @@ -144,20 +144,36 @@ def limit_input_parameters(self, limit_num, output_filename,
*num_generations* - number of generations the algorithm will run for
*shuffle* - whether to shuffle learning, validation and
testing sets for each population member
*data_split* - if shuffling, learning/validation/testing data
is split using this argument
See https://github.com/tjkessler/pygenetics for genetic algorithm
source code.
'''

if use_genetic:
params = ecnet.limit_parameters.limit_genetic(
self.DataFrame, limit_num, population_size, num_survivors,
shuffle=shuffle, print_feedback=self.__print_feedback
)
try:
params = ecnet.limit_parameters.limit_genetic(
self.DataFrame, limit_num, population_size,
num_survivors, num_generations, shuffle=shuffle,
print_feedback=self.__print_feedback
)
except:
params = ecnet.limit_parameters.limit_genetic(
self.DataFrame, limit_num, population_size, num_survivors,
num_generations, shuffle=shuffle, data_split=data_split,
print_feedback=True
)
else:
params = ecnet.limit_parameters.limit_iterative_include(
self.DataFrame, limit_num
)
try:
params = ecnet.limit_parameters.limit_iterative_include(
self.DataFrame, limit_num,
print_feedback=self.__print_feedback
)
except:
params = ecnet.limit_parameters.limit_iterative_include(
self.DataFrame, limit_num, print_feedback=True
)
ecnet.limit_parameters.output(self.DataFrame, params, output_filename)

def tune_hyperparameters(self, target_score=None, iteration_amt=50,
Expand Down
4 changes: 2 additions & 2 deletions examples/limit_input_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
sv.import_data('my_data.csv')

# Limit the input dimensionality to 15, save to 'my_data_limited.csv'
sv.limit_parameters(15, 'my_data_limited.csv')
sv.limit_input_parameters(15, 'my_data_limited.csv')


# Use this line instead for limiting the input dimensionality using a genetic
# algorithm
sv.limit_parameters(15, 'my_data_limited_genetic.csv', use_genetic=True)
sv.limit_input_parameters(15, 'my_data_limited_genetic.csv', use_genetic=True)

0 comments on commit 9b8d0c8

Please sign in to comment.