Skip to content

Commit

Permalink
Conversion to python 3
Browse files Browse the repository at this point in the history
  • Loading branch information
anandtrex committed Oct 12, 2017
1 parent 2b951e1 commit 71d934d
Show file tree
Hide file tree
Showing 105 changed files with 1,245 additions and 922 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ Config.py
.nfs*
dist
*.egg-info
*.bak
5 changes: 3 additions & 2 deletions examples/bicycle/kifdd_triangle.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import division
from past.utils import old_div
import rlpy
import numpy as np
from hyperopt import hp
Expand Down Expand Up @@ -34,8 +36,7 @@ def make_experiment(

domain = rlpy.Domains.BicycleRiding()
opt["domain"] = domain
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)
representation = rlpy.Representations.KernelizediFDD(domain, sparsify=sparsify,
kernel=rlpy.Representations.linf_triangle_kernel,
kernel_args=[kernel_width],
Expand Down
6 changes: 4 additions & 2 deletions examples/cartpole2d/kifdd_gauss.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division

from past.utils import old_div
from rlpy.Domains import InfCartPoleBalance
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -42,10 +44,10 @@ def make_experiment(

domain = InfCartPoleBalance()
opt["domain"] = domain
kernel_width = (
kernel_width = old_div((
domain.statespace_limits[:,
1] - domain.statespace_limits[:,
0]) / kernel_resolution
0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=gaussian_kernel,
kernel_args=[kernel_width],
Expand Down
6 changes: 4 additions & 2 deletions examples/cartpole2d/kifdd_triangle.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division

from past.utils import old_div
from rlpy.Domains import InfCartPoleBalance
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -43,10 +45,10 @@ def make_experiment(

domain = InfCartPoleBalance()
opt["domain"] = domain
kernel_width = (
kernel_width = old_div((
domain.statespace_limits[:,
1] - domain.statespace_limits[:,
0]) / kernel_resolution
0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=linf_triangle_kernel,
kernel_args=[kernel_width],
Expand Down
5 changes: 3 additions & 2 deletions examples/cartpole_modern/kifdd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division
from past.utils import old_div
from rlpy.Domains.FiniteTrackCartPole import FiniteCartPoleBalanceOriginal, FiniteCartPoleBalanceModern
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -39,8 +41,7 @@ def make_experiment(

domain = FiniteCartPoleBalanceModern()
opt["domain"] = domain
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)

representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=gaussian_kernel,
Expand Down
5 changes: 3 additions & 2 deletions examples/cartpole_orig/kifdd_gauss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division
from past.utils import old_div
from rlpy.Domains.FiniteTrackCartPole import FiniteCartPoleBalanceOriginal, FiniteCartPoleBalanceModern
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -43,8 +45,7 @@ def make_experiment(
domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
opt["domain"] = domain
# domain = FiniteCartPoleBalanceModern()
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=gaussian_kernel,
kernel_args=[kernel_width],
Expand Down
5 changes: 3 additions & 2 deletions examples/cartpole_orig/kifdd_triangle.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division
from past.utils import old_div
from rlpy.Domains.FiniteTrackCartPole import FiniteCartPoleBalanceOriginal, FiniteCartPoleBalanceModern
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -43,8 +45,7 @@ def make_experiment(
domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
opt["domain"] = domain
# domain = FiniteCartPoleBalanceModern()
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=linf_triangle_kernel,
kernel_args=[kernel_width],
Expand Down
5 changes: 3 additions & 2 deletions examples/heli/kifdd.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import division
from past.utils import old_div
from rlpy.Domains import HelicopterHover
from rlpy.Agents import Q_Learning
from rlpy.Representations import *
Expand Down Expand Up @@ -40,8 +42,7 @@ def make_experiment(
domain = HelicopterHover()
opt["domain"] = domain
# domain = FiniteCartPoleBalanceModern()
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=linf_triangle_kernel,
kernel_args=[kernel_width],
Expand Down
5 changes: 3 additions & 2 deletions examples/hiv/kifdd.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import division
from past.utils import old_div
from rlpy.Domains.HIVTreatment import HIVTreatment
from rlpy.Agents import Q_Learning
from rlpy.Representations import *
Expand Down Expand Up @@ -40,8 +42,7 @@ def make_experiment(
domain = HIVTreatment()
opt["domain"] = domain
# domain = FiniteCartPoleBalanceModern()
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=gaussian_kernel,
kernel_args=[kernel_width],
Expand Down
5 changes: 3 additions & 2 deletions examples/hiv/kifdd_triangle.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import division
from past.utils import old_div
from rlpy.Domains.HIVTreatment import HIVTreatment
from rlpy.Agents import Q_Learning
from rlpy.Representations import *
Expand Down Expand Up @@ -39,8 +41,7 @@ def make_experiment(

domain = HIVTreatment()
opt["domain"] = domain
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=linf_triangle_kernel,
kernel_args=[kernel_width],
Expand Down
6 changes: 4 additions & 2 deletions examples/puddleworld/kifdd_gauss.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division

from past.utils import old_div
from rlpy.Domains import PuddleWorld
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -43,10 +45,10 @@ def make_experiment(

domain = PuddleWorld()
opt["domain"] = domain
kernel_width = (
kernel_width = old_div((
domain.statespace_limits[:,
1] - domain.statespace_limits[:,
0]) / kernel_resolution
0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=gaussian_kernel,
kernel_args=[kernel_width],
Expand Down
6 changes: 4 additions & 2 deletions examples/puddleworld/kifdd_gauss_gap.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division

from past.utils import old_div
from rlpy.Domains.PuddleWorld import PuddleGapWorld
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -43,10 +45,10 @@ def make_experiment(

domain = PuddleGapWorld()
opt["domain"] = domain
kernel_width = (
kernel_width = old_div((
domain.statespace_limits[:,
1] - domain.statespace_limits[:,
0]) / kernel_resolution
0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=gaussian_kernel,
kernel_args=[kernel_width],
Expand Down
6 changes: 4 additions & 2 deletions examples/puddleworld/kifdd_triangle.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Cart-pole balancing with continuous / Kernelized iFDD
"""
from __future__ import division

from past.utils import old_div
from rlpy.Domains import PuddleWorld
from rlpy.Agents import SARSA, Q_LEARNING
from rlpy.Representations import *
Expand Down Expand Up @@ -42,10 +44,10 @@ def make_experiment(

domain = PuddleWorld()
opt["domain"] = domain
kernel_width = (
kernel_width = old_div((
domain.statespace_limits[:,
1] - domain.statespace_limits[:,
0]) / kernel_resolution
0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=linf_triangle_kernel,
kernel_args=[kernel_width],
Expand Down
7 changes: 5 additions & 2 deletions examples/swimmer/kifdd_triangle.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from __future__ import division
from builtins import str
from builtins import range
from past.utils import old_div
from rlpy.Domains import Swimmer
from rlpy.Agents import Q_Learning, SARSA
from rlpy.Representations import *
Expand Down Expand Up @@ -40,8 +44,7 @@ def make_experiment(

domain = Swimmer()
opt["domain"] = domain
kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
/ kernel_resolution
kernel_width = old_div((domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution)
representation = KernelizediFDD(domain, sparsify=sparsify,
kernel=gaussian_kernel,
kernel_args=[kernel_width],
Expand Down
5 changes: 3 additions & 2 deletions examples/tutorial/run_infTrackCartPole_batch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from builtins import range
from rlpy.Tools.run import run
run("examples/tutorial/infTrackCartPole_rbfs.py", "./Results/Tutorial/InfTrackCartPole/RBFs",
ids=range(10), parallelization="joblib")
ids=list(range(10)), parallelization="joblib")

run("examples/tutorial/infTrackCartPole_tabular.py", "./Results/Tutorial/InfTrackCartPole/Tabular",
ids=range(10), parallelization="joblib")
ids=list(range(10)), parallelization="joblib")
3 changes: 2 additions & 1 deletion examples/tutorial/run_parametersearch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import print_function
from rlpy.Tools.hypersearch import find_hyperparameters
best, trials = find_hyperparameters(
"examples/tutorial/infTrackCartPole_rbfs.py",
"./Results/Tutorial/InfTrackCartPole/RBFs_hypersearch",
max_evals=10, parallelization="joblib",
trials_per_point=5)
print best
print(best)
10 changes: 6 additions & 4 deletions rlpy/Agents/Agent.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
"""Standard Control Agent. """
from __future__ import division

from past.utils import old_div
from builtins import object
from abc import ABCMeta, abstractmethod
import numpy as np
import logging
from future.utils import with_metaclass

__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
Expand All @@ -11,7 +15,7 @@
__author__ = "Alborz Geramifard"


class Agent(object):
class Agent(with_metaclass(ABCMeta, object)):

"""Learning Agent for obtaining good policices.
Expand Down Expand Up @@ -43,8 +47,6 @@ class Agent(object):
All new agent implementations should inherit from this class.
"""

__metaclass__ = ABCMeta
# The Representation to be used by the Agent
representation = None
#: discount factor determining the optimal policy
Expand Down Expand Up @@ -213,7 +215,7 @@ def updateLearnRate(self, phi, phi_prime, eligibility_trace,
candid_learn_rate = np.dot(discount_factor * phi_prime - phi,
eligibility_trace)
if candid_learn_rate < 0:
self.learn_rate = np.minimum(self.learn_rate,-1.0/candid_learn_rate)
self.learn_rate = np.minimum(self.learn_rate,old_div(-1.0,candid_learn_rate))
elif self.learn_rate_decay_mode == 'boyan':
self.learn_rate = self.initial_learn_rate * \
(self.boyan_N0 + 1.) / \
Expand Down
4 changes: 3 additions & 1 deletion rlpy/Agents/Greedy_GQ.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Greedy-GQ(lambda) learning agent"""
from __future__ import division
from past.utils import old_div
from .Agent import Agent, DescentAlgorithm
from rlpy.Tools import addNewElementForAllActions, count_nonzero
import numpy as np
Expand Down Expand Up @@ -63,7 +65,7 @@ def learn(self, s, p_actions, a, r, ns, np_actions, na, terminal):
phi_prime_s)
nnz = count_nonzero(phi_s) # Number of non-zero elements

expanded = (- len(self.GQWeight) + len(phi)) / self.representation.actions_num
expanded = old_div((- len(self.GQWeight) + len(phi)), self.representation.actions_num)
if expanded:
self._expand_vectors(expanded)
# Set eligibility traces:
Expand Down
3 changes: 2 additions & 1 deletion rlpy/Agents/LSPI.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Least-Squares Policy Iteration [Lagoudakis and Parr 2003]."""
from __future__ import print_function
from .BatchAgent import BatchAgent
import rlpy.Tools as Tools
import numpy as np
Expand Down Expand Up @@ -133,7 +134,7 @@ def representationExpansionLSPI(self):
added_feature = True

if self.representation.features_num == 0:
print "No features, hence no LSPI is necessary!"
print("No features, hence no LSPI is necessary!")
return

self.logger.info(
Expand Down
4 changes: 3 additions & 1 deletion rlpy/Agents/NaturalActorCritic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Experimental Implementation of Natural Actor Critic
"""
from __future__ import division
from past.utils import old_div
import numpy as np
from .Agent import Agent
from rlpy.Tools import solveLinear, regularize
Expand All @@ -23,7 +25,7 @@ class NaturalActorCritic(Agent):
"""

# minimum for the cosine of the current and last gradient
min_cos = np.cos(np.pi / 180.)
min_cos = np.cos(old_div(np.pi, 180.))

def __init__(self, policy, representation, discount_factor, forgetting_rate,
min_steps_between_updates, max_steps_between_updates, lambda_,
Expand Down
9 changes: 6 additions & 3 deletions rlpy/Agents/TDControlAgent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""Control Agents based on TD Learning, i.e., Q-Learning and SARSA"""
from __future__ import division
from __future__ import print_function
from past.utils import old_div
from .Agent import Agent, DescentAlgorithm
from rlpy.Tools import addNewElementForAllActions, count_nonzero
import numpy as np
Expand Down Expand Up @@ -61,8 +64,8 @@ def learn(self, s, p_actions, a, r, ns, np_actions, na, terminal):

# Set eligibility traces:
if self.lambda_:
expanded = (- len(self.eligibility_trace) + len(phi)) / \
self.representation.actions_num
expanded = old_div((- len(self.eligibility_trace) + len(phi)), \
self.representation.actions_num)
if expanded > 0:
# Correct the size of eligibility traces (pad with zeros for
# new features)
Expand Down Expand Up @@ -95,7 +98,7 @@ def learn(self, s, p_actions, a, r, ns, np_actions, na, terminal):
td_error * self.eligibility_trace
if not np.all(np.isfinite(weight_vec)):
weight_vec = weight_vec_old
print "WARNING: TD-Learning diverged, weight_vec reached infinity!"
print("WARNING: TD-Learning diverged, weight_vec reached infinity!")
# Discover features if the representation has the discover method
expanded = self.representation.post_discover(
s,
Expand Down
Loading

0 comments on commit 71d934d

Please sign in to comment.