A sketch of how Q learning will be implemented
def step(self):
"""Sense, decide what to do, act, and learn."""
# Reinitialize reinforcement
self.reinforcement = 0
# Get sensory input
state = self.sensory_state()
# Get the critter's current Q-values for this state
q_values = self.get_Q(state)
# Update reinforcement for sensing cost and cost of step
self.reinforcement += self.sense_reinforce()
self.reinforcement += self.step_cost
# Select action
action_index = self.decide(q_values)
# Execute action and increment reinforcement based on what happens
self.reinforcement += self.index2action(action_index)()
# Q learning
if self.world.n_steps != 0:
self.learn(state, q_values)
self.last_reinf = self.reinforcement
self.last_state = state
self.last_action = action_index
# Update strength
self.strengthen(self.reinforcement)