The basic idea

Q learning

An example: maze

Another example: ALife

Making decisions

Implementation

A sketch of how Q learning will be implemented

def step(self):
    """Sense, decide what to do, act, and learn."""
    # Reinitialize reinforcement
    self.reinforcement = 0

    # Get sensory input
    state = self.sensory_state()
    # Get the critter's current Q-values for this state
    q_values = self.get_Q(state)

    # Update reinforcement for sensing cost and cost of step
    self.reinforcement += self.sense_reinforce()
    self.reinforcement += self.step_cost

    # Select action
    action_index = self.decide(q_values)

    # Execute action and increment reinforcement based on what happens
    self.reinforcement += self.index2action(action_index)()

    # Q learning
    if self.world.n_steps != 0:
        self.learn(state, q_values)
    self.last_reinf = self.reinforcement
    self.last_state = state
    self.last_action = action_index

    # Update strength
    self.strengthen(self.reinforcement)