-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support for shuffling data sets during parameter limiting proce…
…ss, after each inclusion or after each generation (if using genetic algorithm)
- Loading branch information
Showing
2 changed files
with
37 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
# -*- coding: utf-8 -*- | ||
# | ||
# ecnet/limit_parameters.py | ||
# v.1.4.2 | ||
# v.1.4.3 | ||
# Developed in 2018 by Travis Kessler <[email protected]> | ||
# | ||
# This program contains the functions necessary for reducing the input dimensionality of a | ||
|
@@ -21,9 +21,10 @@ | |
|
||
''' | ||
Limits the dimensionality of input data found in supplied *DataFrame* object to a | ||
dimensionality of *limit_num* | ||
dimensionality of *limit_num* using iterative inclusion; optional argument of | ||
*shuffle* for shuffling the data sets after each inclusion | ||
''' | ||
def limit_iterative_include(DataFrame, limit_num): | ||
def limit_iterative_include(DataFrame, limit_num, shuffle = False): | ||
|
||
# List of retained input parameters | ||
retained_input_list = [] | ||
|
@@ -33,20 +34,25 @@ def limit_iterative_include(DataFrame, limit_num): | |
valid_input_retained = [] | ||
test_input_retained = [] | ||
|
||
# Obtain Numpy arrays for learning, validation, testing sets | ||
packaged_data = DataFrame.package_sets() | ||
|
||
# Until specified number of paramters *limit_num* are retained | ||
while len(retained_input_list) < limit_num: | ||
|
||
# List of RMSE's for currently retained inputs + new inputs to test | ||
retained_rmse_list = [] | ||
|
||
# For all input paramters to test | ||
for idx, param in enumerate(DataFrame.input_names): | ||
|
||
# If shuffling the data sets after each inclusion | ||
if shuffle: | ||
# Shuffle all sets | ||
DataFrame.shuffle('l', 'v', 't') | ||
# Obtain Numpy arrays for learning, validation, testing sets | ||
packaged_data = DataFrame.package_sets() | ||
|
||
# For all input paramters to test | ||
for idx, param in enumerate(DataFrame.input_names): | ||
|
||
# Obtain input parameter column for learning, validation and test sets | ||
learn_input_add = [[sublist[idx]] for sublist in packaged_data.learn_x] | ||
valid_input_add = [[sublist[idx]] for sublist in packaged_data.valid_x] | ||
|
@@ -130,10 +136,11 @@ def limit_iterative_include(DataFrame, limit_num): | |
dimensionality of *limit_num* using a genetic algorithm. Optional arguments for | ||
*population_size* of genetic algorithm's population, *num_survivors* for selecting | ||
the best performers from each population generation to reproduce, *num_generations* | ||
for the number of times the population will reproduce, and *print_feedback* for | ||
printing the average fitness score of the population after each generation. | ||
for the number of times the population will reproduce, *shuffle* for shuffling the | ||
data sets after each generation, and *print_feedback* for printing the average | ||
fitness score of the population after each generation. | ||
''' | ||
def limit_genetic(DataFrame, limit_num, population_size, num_survivors, num_generations, print_feedback = True): | ||
def limit_genetic(DataFrame, limit_num, population_size, num_survivors, num_generations, shuffle = False, print_feedback = True): | ||
|
||
''' | ||
Genetic algorithm cost function, supplied to the genetic algorithm; returns the RMSE | ||
|
@@ -148,7 +155,7 @@ def ecnet_limit_inputs(feed_dict): | |
test_input = [] | ||
|
||
# For the input parameters chosen by the genetic algorithm: | ||
for idx, param in enumerate(feed_dict): | ||
for param in feed_dict: | ||
|
||
# Grab the input parameter | ||
learn_input_add = [[sublist[feed_dict[param]]] for sublist in packaged_data.learn_x] | ||
|
@@ -214,6 +221,15 @@ def minimize_best_n(members, n): | |
|
||
# Run the genetic algorithm for *num_generations* generations | ||
for gen in range(num_generations): | ||
|
||
# If shuffling data sets between generations | ||
if shuffle: | ||
# Shuffle all sets | ||
DataFrame.shuffle('l', 'v', 't') | ||
# Obtain Numpy arrays for learning, validation, testing sets | ||
packaged_data = DataFrame.package_sets() | ||
|
||
# Next generation | ||
population.next_generation(num_survivors = num_survivors, mut_rate = 0) | ||
if print_feedback: | ||
print('Generation: ' + str(gen + 1) + ' - Population fitness: ' + str(sum(p.fitness_score for p in population.members) / len(population))) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
# -*- coding: utf-8 -*- | ||
# | ||
# ecnet/server.py | ||
# v.1.4.2 | ||
# v.1.4.3 | ||
# Developed in 2018 by Travis Kessler <[email protected]> | ||
# | ||
# This file contains the "Server" class, which handles ECNet project creation, | ||
|
@@ -130,10 +130,18 @@ def import_data(self, data_filename = None): | |
# Package sets for model hand-off | ||
self.packaged_data = self.DataFrame.package_sets() | ||
|
||
def limit_parameters(self, limit_num, output_filename, use_genetic = False, population_size = 500, num_survivors = 200, num_generations = 25): | ||
''' | ||
Limits the input dimensionality of the currently loaded DataFrame to a dimension of *limit_num*. | ||
Saves the resulting limited DataFrame to *output_filename*. Option to *shuffle* data sets between | ||
inclusions/after each generation if using genetic algorithm. *use_genetic* allows for using a | ||
genetic algorithm to limit the dimensionality (default to iterative inclusion), with arguments | ||
for genetic algorithm *population_size*, *num_survivors* of each generation, and the number of | ||
generations *num_generations* (PyGenetics package). | ||
''' | ||
def limit_parameters(self, limit_num, output_filename, use_genetic = False, population_size = 500, num_survivors = 200, num_generations = 25, shuffle = False): | ||
|
||
if use_genetic: | ||
params = ecnet.limit_parameters.limit_genetic(self.DataFrame, limit_num, population_size, num_survivors, num_generations, print_feedback = self.vars['project_print_feedback']) | ||
params = ecnet.limit_parameters.limit_genetic(self.DataFrame, limit_num, population_size, num_survivors, num_generations, shuffle = shuffle, print_feedback = self.vars['project_print_feedback']) | ||
else: | ||
params = ecnet.limit_parameters.limit_iterative_include(self.DataFrame, limit_num) | ||
ecnet.limit_parameters.output(self.DataFrame, params, output_filename) | ||
|