Update with codes

lhcavalcanti · Dec 3, 2020 · 693b759 · 693b759
1 parent 7212cfd
commit 693b759
Show file tree

Hide file tree

Showing 47 changed files with 20,680 additions and 2 deletions.
diff --git a/Q-learning b/Q-learning
diff --git a/q-learning/README.md b/q-learning/README.md
@@ -0,0 +1,44 @@
+# Q-learning
+Adaptive Cognitive Agents Exercise - Q-learning
+
+![alt text](./env.png)
+
+- Parameters:
+```
+Alpha = 0.5
+Gamma = 0.8
+Actions Order = [UP, DW, LF, RG]
+```
+- Rewards:
+```
+Rewards of -1.
+- Except for terminal state, which is +10.
+- If colide with walls reward is -10.
+```
+- Episodes:
+```
+Episode 1
+Initial State: 0
+Actions: Up, Up, Up, Right
+```
+```
+Episode 2
+Initial State: 4
+Actions: Right, Right, Left, Up
+```
+#### Final Values:
+```
+ [[-5.   0.   0.   5. ] | [ 0.   0.   0.   0. ]]
+
+ [[-0.5  0.  -5.  -0.5] | [ 5.   0.  -0.5  0. ]]
+
+ [[-0.5  0.   0.   0. ] | [ 0.   0.   0.   0. ]]
+```
+
+
+#### Final Policy:
+```
+ ['RG', '+10'] 
+ ['DW', 'UP'] 
+ ['DW', 'UP']
+```
diff --git a/q-learning/env.png b/q-learning/env.png
diff --git a/q-learning/main.py b/q-learning/main.py
@@ -0,0 +1,109 @@
+import numpy as np
+
+# ========== DEFINITIONS ============
+# epsilon = 0.001 # Convergence
+alpha = 0.5
+gamma = 0.8
+
+value = np.zeros((6, 4))
+
+rewards = np.full((6, ), -1)
+rewards[5] = 10
+terminal_state = 5
+
+possible_actions = ["U", "D", "L", "R"]
+
+# ==========   EPISODES   ===========
+initial_state = [0, 4]
+paths = [["U", "U", "U", "R"], ["L", "L", "R", "U"]]
+# ===================================
+
+def update_value(value, state, action):
+    if state == terminal_state:
+        return value, state
+
+    s = state
+    a = possible_actions.index(action)
+
+    next_s = get_next_state(s, a)
+
+    if next_s != s:
+        rw = rewards[next_s]
+    else:
+        rw = -10
+
+    value[s][a] += alpha * (rw + gamma * ( np.max([value[next_s][i] for i in range(4)]) - value[s, a] ) )
+
+
+    print("Next State: ", next_s)
+    np.set_printoptions(precision=3)
+    print(print_value(value), "\n")
+    return value, next_s
+
+def get_next_state(s, a):
+    next_s = s
+
+    if a == 0:
+        if s != 2 and s != 5:
+            next_s = s + 1
+    if a == 1:
+        if s != 0 and s != 3:
+            next_s = s - 1
+    if a == 2:
+        if s != 0 and s != 1 and s!= 2:
+            next_s = s - 3
+    if a == 3:
+        if s != 3 and s != 4 and s!= 5:
+            next_s = s + 3
+
+    return next_s
+
+
+def return_policy(value):   
+    policy = []
+
+    # obs.: somente 9 estados porque n�o ha a��o aplicada nos dois estados terminais
+    for s in range(6):
+        action = np.argmax([value[s][i] for i in range(4)])
+        policy.append(action)
+
+    actions = ["UP","DW","LF","RG"]
+
+    s1 = [actions[policy[2]], "+10"]
+    s2 = [actions[policy[1]],actions[policy[4]]]
+    s3 = [actions[policy[0]],actions[policy[3]]]
+
+    print("\n",s1,"\n",s2,"\n",s3, "\n")
+
+    return(policy)
+
+def print_value(value):
+    aux = np.zeros((3, 2, 4))
+    aux[0,] = np.array((value[2],value[5]))
+    aux[1,] = np.array((value[1],value[4]))
+    aux[2,] = np.array((value[0],value[3]))
+    return aux
+
+if __name__ == "__main__":
+    episode = 0
+
+    for path in paths:
+        state = initial_state[episode]
+        print("===== Episode: ", episode, " - Initial State: ", state, " ======")
+        for action in path:
+            old_value = value.copy()
+            value, state = update_value(value, state, action)
+
+            if state == terminal_state:
+                print("Terminal State!")
+                break
+
+            # diff = np.sum(value - old_value)
+            # if abs(diff) < epsilon:
+            #     print("Converged!")
+            #     break
+
+        print("Updated values: \n", print_value(value), "\n---\n")
+        episode += 1
+
+    policy = return_policy(value)
diff --git a/salesman-problem/.gitignore b/salesman-problem/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/salesman-problem/README.md b/salesman-problem/README.md
@@ -0,0 +1,3 @@
+# traveling-salesman
+
+Activity of Adaptative Cognitives Agents Class
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# traveling-salesman

		Activity of Adaptative Cognitives Agents Class