-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsarsa.cc
116 lines (90 loc) · 2.45 KB
/
sarsa.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*
* Code by Chris Mansley
*/
#include <fstream>
#include <iterator>
#include <cmath>
#include "sarsa.hh"
SARSA::SARSA(Domain *d, Chopper *c, double epsilon) : Planner(d, c, epsilon)
{
alpha = 0.9;
}
/*
*
*/
void SARSA::initialize(std::string filename)
{
/* Locally store gamma */
gamma = domain->getDiscountFactor();
stateDimension = domain->getStateDimension();
actionDimension = domain->getActionDimension();
/*Parse log file data */
parseData(filename);
}
/*
*
*/
void SARSA::parseData(std::string infile)
{
SARS *sars = new SARS(stateDimension, actionDimension);
SARS *nextSARS = new SARS(stateDimension, actionDimension);
double q, q_prime;
/* Open log file */
logfile.open(infile.c_str());
for(int i=0; i<300; i++) {
/* Always grab at least one */
logfile >> *sars;
while(!logfile.eof()) {
/* Parse log file */
logfile >> *nextSARS;
/* Don't process samples straddling a terminal */
if(!sars->terminal) {
/* Grab Q-values for next state and this state */
std::vector<int> sa = chopper->discretizeState(nextSARS->s);
int a = chopper->discretizeAction(nextSARS->a);
sa.push_back(a);
q_prime = Q[sa]; /* depends on map initializing to default of 0.0 */
sa = chopper->discretizeState(sars->s);
a = chopper->discretizeAction(sars->a);
sa.push_back(a);
q = Q[sa]; /* depends on map initializing to default of 0.0 */
/* SARSA rule */
Q[sa] = q + alpha*(sars->reward + gamma*q_prime - q);
}
/* Attempting deep copy may not work */
*sars = *nextSARS;
}
logfile.clear();
logfile.seekg(0);
}
/* Close log file */
logfile.close();
}
/*
*
*/
Action SARSA::plan(State s)
{
int k = chopper->getNumDiscreteActions();
/* Create vector of ints for state and action*/
std::vector<int> sad = chopper->discretizeState(s);
sad.push_back(0); // action slot
/* Grab the Q-value for this state action */
std::vector<double> qtemp;
for(int action=0; action < k; action++) {
sad.back() = action;
/* Store Q-value */
if(Q.find(sad) != Q.end()) {
qtemp.push_back(Q[sad]);
} else {
qtemp.push_back(0);
}
}
/* Create max action or random if there are more than one */
int discreteAction;
/* Grab max action */
std::vector<double>::const_iterator largest = max_element(qtemp.begin(), qtemp.end());
discreteAction = largest - qtemp.begin();
Action a = chopper->continuousAction(discreteAction);
return a;
}