from pyrobot.brain import Brain
from time import sleep
from random import *
from pyrobot.brain.td import *
class SimpleBrain(Brain):
random_percent = 20
reset = 0
# reinforcement value for temporal differencing
REINFORCEMENT_VALUE = .2
# just calls find_successor
def find_path( self ):
loc = self.robot.ask('location')
# make sure we got a tuple
if type(loc) != type( () ):
return "";
direction = self.find_successor(loc)
return direction
# finds the next step, by random, utility-based, or direct moves
def find_successor( self, loc ):
valid_state = 0
moves = self.get_valid_moves( loc )
if not moves:
path = self.robot.ask('path')
cx, cy = loc
px, py = path[len(path) - 2]
if px==cx:
if py < cy:
return "up"
else:
return "down"
else:
if px < cx:
return "left"
else:
return "right"
else:
move_type = randrange(0, 100, 1)
if move_type < self.random_percent: # chance of random movement
return moves[ randrange(0,len(moves)) ]
else:
return self.util_successor( loc, moves )
# returns the next location, based on utility values
def util_successor( self, loc, valid_moves ):
# best move will be the move with the highest utility value
best_move = "none"
zero_count = 0
max_util = -999
for m in valid_moves:
# get the state
new_state = self.valid_move( m, loc )
# if we get a valid state, check to see if its the best one yet
if new_state != 0:
util = self.get_util( new_state )
if util == 0.0:
zero_count += 1
if util > max_util:
max_util = util
best_move = m
# make sure we aren't all zero
if max_util == 0 and zero_count == len(valid_moves):
return valid_moves[ randrange(0,len(valid_moves)) ]
# make the move
elif best_move != "none":
return best_move
else:
# should NEVER get here, but just in case!
print "NO GOOD SUCCESSOR FOUND"
return valid_moves[ randrange(0,len(valid_moves)) ]
def get_valid_moves( self, loc ):
valid = []
states = ("up", "down", "left", "right")
for s in states:
if self.valid_move( s, loc ):
valid.append( s )
return valid
#checks to make sure that a move is valid
def valid_move ( self, move, loc ):
(locX, locY) = loc
visited = self.robot.ask('visited')
if move == "up":
new_state = (locX , locY - 1)
elif move == "right":
new_state = (locX+1, locY)
elif move == "down":
new_state = (locX , locY + 1)
elif move == "left":
new_state = (locX-1, locY)
# if the new state is onto an obstacle, return 0
if new_state in self.obstacles:
return 0
# if the new state is off the map, return 0
elif min(new_state) < 0 or max(new_state) > 14:
return 0
elif new_state in visited:
return 0
else:
return new_state
# returns the utility value for a given location
def get_util( self, loc ):
#utils = self.robot.ask('util')
(locX,locY) = loc
if locX < 0 or locX > 14 or locY < 0 or locY > 14:
return -99
else:
return self.td.get_utility(locX, locY)
# gets called before first run
def setup(self, **args):
# set reinforcement value
alpha = self.REINFORCEMENT_VALUE
# get td instance
self.td = Temporal_Difference( 15, 15, self.robot.ask('goal'), self.robot.ask('pits'), alpha )
self.obstacles = self.robot.ask("obstacles")
# initialize the number of iterations to zero
self.count = 0
# run once every iteration
def step(self):
if self.reset == 1:
self.robot.tell("reset")
self.robot.tell("start")
self.setup()
self.reset = 0
# if the goal has not been reached, continue to find a path
if self.robot.ask('complete') == 0:
direction = self.find_path()
self.robot.move( direction )
# if the goal has been reached, display the current path
else:
path = self.robot.ask('path')
self.count += 1
# show the path
print "PATH: ", path
# adjust the utility values of the map
print "Computing TDs: #", self.count, " length: ", len(path)
self.td.do_td( path )
# start a new run
self.robot.tell('start')
for e in path:
# sends the robot a formatted string including the x,y coordinates, and a color
self.robot.tell( self.td.get_utility_color(e[0], e[1]));
alpha = self.td.get_alpha()
if( (self.count % 15) == 0 and alpha > 0 ):
self.td.set_alpha( alpha - 0.01 )
self.random_percent -= 5
def INIT(engine):
return SimpleBrain('SimpleBrain', engine)