UserPreferences

GovernorBrain


  1 
  2 
  3 
  4 
  5 
  6 
  7 
  8 
  9 
 10 
 11 
 12 
 13 
 14 
 15 
 16 
 17 
 18 
 19 
 20 
 21 
 22 
 23 
 24 
 25 
 26 
 27 
 28 
 29 
 30 
 31 
 32 
 33 
 34 
 35 
 36 
 37 
 38 
 39 
 40 
 41 
 42 
 43 
 44 
 45 
 46 
 47 
 48 
 49 
 50 
 51 
 52 
 53 
 54 
 55 
 56 
 57 
 58 
 59 
 60 
 61 
 62 
 63 
 64 
 65 
 66 
 67 
 68 
 69 
 70 
 71 
 72 
 73 
 74 
 75 
 76 
 77 
 78 
 79 
 80 
 81 
 82 
 83 
 84 
 85 
 86 
 87 
 88 
 89 
 90 
 91 
 92 
 93 
 94 
 95 
 96 
 97 
 98 
 99 
100 
101 
102 
103 
104 
105 
106 
107 
108 
109 
110 
111 
112 
113 
114 
115 
116 
117 
118 
119 
120 
121 
122 
123 
124 
125 
126 
127 
128 
129 
130 
131 
132 
133 
134 
135 
136 
137 
138 
139 
140 
141 
142 
143 
144 
145 
146 
147 
148 
149 
150 
151 
152 
153 
154 
155 
156 
157 
158 
159 
160 
161 
162 
163 
164 
165 
166 
167 
168 
169 
170 
171 
172 
173 
174 
175 
176 
177 
178 
179 
180 
181 
182 
183 
184 
185 
186 
187 
188 
189 
190 
191 
192 
193 
194 
195 
196 
197 
198 
199 
200 
201 
202 
203 
204 
205 
206 
207 
208 
209 
210 
211 
212 
213 
214 
215 
216 
217 
218 
219 
220 
221 
222 
223 
224 
225 
226 
227 
228 
229 
230 
231 
232 
233 
234 
235 
236 
237 
238 
239 
240 
241 
242 
243 
244 
245 
246 
247 
248 
249 
250 
251 
252 
253 
254 
255 
256 
257 
258 
259 
260 
261 
262 
263 
264 
265 
266 
267 
268 
269 
270 
271 
272 
273 
274 
275 
276 
277 
278 
279 
280 
281 
282 
283 
284 
285 
286 
287 
288 
289 
290 
291 
292 
293 
294 
295 
296 
297 
298 
299 
300 
301 
302 
303 
304 
# imported modules
from pyrobot.brain import Brain
from pyrobot.brain.VisConx.VisRobotConx import *
import pyrobot.brain.ravq
import os
import time
import random


# log file directories
rootDirectory = "/local/"
currentExperiment = "data/"
currentBrain = "/local/GovernorBrain.py"

class GovernorBrain(Brain):
    """A brain that uses a RAVQ to govern network learning."""
    def setup(self):

        # for use with player/stage
        #self.startService('truth')
        #self.startService('bumper')

        # robot parameters
        self.robot.range.units = 'ROBOTS'
        self.maxvalue = self.robot.range.getMaxvalue()
        self.maxvalue += 0.075

        # status variables
        self.verbosity = 1
        self.direction = 1
        self.blockedFront = 0
        self.wasStalled = 0
        self.counter = 0
        self.previous = [0.0, 0.0]

        # tweakable params
        self.sleepTime = 0.10
        self.stopTime = 10000

        # choose the governor method
        self.method = 0

        # create network
        self.net = VisRobotNetwork() # could use VisRobotSRN()
        self.inSize = self.robot.range.count
        self.net.addLayers(self.inSize, self.inSize/2, 2)

        # defaults - but here explicit
        self.net.setBatch(0)
        self.net.setInteractive(0)
        self.net.setVerbosity(0)

        # initialize network
        self.net.initialize()

        # learning parameters
        self.net.setEpsilon(0.2)
        self.net.setMomentum(0.9)
        self.net.setTolerance(0.05)

        # set learning
        self.net.setLearning(1)

        # input ravq (tweakable parameters)
        self.ravq = pyro.brain.ravq.ExperimentalRAVQ(5, .3, .2, .02)
        self.ravq.setHistory(1)
        self.ravq.setAddModels(1)
        self.ravq.setLearning(1)
        self.ravq.setMask([1] * self.inSize + [self.inSize / 2] * 2)

        # buffer for governor
        self.buffer = []
        self.bufferSize = 100
        self.bufferIndex = 0

        # file IO
        self.path = rootDirectory + currentExperiment
        if(os.path.isfile(self.path + "exp.lock")):
            raise "Lock error!"
        else:
            try:
                os.mkdir(self.path)
            except:
                pass
            lock = open(self.path + "exp.lock", "w")
            lock.write("This file locks the experiment directory to" + \
                       "prevent overwriting experimental data.")
            lock.close()
            # archive brain for future reference
            os.system("cp " + currentBrain + " " + self.path + "archive.py")
            self.netInfo = open(self.path + 'nn.dat', 'w')
            self.ravq.openLog(self.path + 'ravq.log')
            self.ravqInfo = open(self.path + 'ravq.dat', 'w')
            self.repositionLog = open(self.path + 'reposition.dat','w')
            self.data = open(self.path + 'input_target.dat', 'w')
            self.balancedData = open(self.path + 'balanced.dat', 'w')

    def destroy(self):
        self.netInfo.close()
        self.ravq.closeLog()
        self.ravqInfo.close()
        self.repositionLog.close()
        self.data.close()
        self.balancedData.close()
        self.net.destroy()

    def saveListToFile(self, ls, file):
        for i in range(len(ls)):
            file.write(str(ls[i]) + " ")
        file.write("\n")

    def scaleSensors(self, val):
        """From Robots (or anything) to [0, 1]"""
        return (val / self.maxvalue)

    def scaleMotors(self, val):
        """[-1, 1] to [0, 1]"""
        return (val + 1) / 2.0

    def kick(self):
        """How to get unstuck."""
        self.repositionLog.write("STALLED " + str(self.counter) + "\n")
        self.move(0.5 * random.random(), 0.0)
        time.sleep(1)
        self.update()
        if self.get('robot/stall'):
            self.move(-0.5 * random.random(), 0.0)
            time.sleep(1)
            self.update()
            if self.get('robot/stall'):
                self.move(0.0, 0.5 * random.random())
                time.sleep(1)
                self.update()
                if self.get('robot/stall'):
                    self.move(0.0, -0.5 * random.random())
                    time.sleep(1)
                    self.update()

    # this is not the wall follower!

    def avoidObstacles(self):
        """
        Determines next action, but doesn't execute it.
        Returns the translate and rotate values.
        
        When front is blocked, it picks to turn away from the
        direction with the minimum reading and maintains that
        turn until front is clear.
        """
        d = 0.7
        ds = 0.3
        turn = random.random()
        minFront = min(self.get('robot/range/front/value'))
        minLeft  = min(self.get('robot/range/front-left/value'))
        minRight = min(self.get('robot/range/front-right/value'))
        sideLeft = self.get('robot/range/0/value')
        sideRight = self.get('robot/range/7/value')
        if minFront < d:
            if not self.blockedFront:
                if minRight < minLeft:
                    self.direction = 1
                else:
                    self.direction = -1
            self.blockedFront = 1
            return [0, self.direction * turn]
        elif minLeft < d:
            if self.blockedFront:
                return [0, self.direction * turn]
            else:
                return [0,-turn]
        elif minRight < d:
            if self.blockedFront:
                return [0, self.direction * turn]
            else:
                return [0,turn]
        else:
            if sideLeft < ds:
                return [0,-turn]
            elif sideRight < ds:
                return [0,turn]
            else:
                self.blockedFront = 0
                return [.2,0]

    def wallFollower(self):
        # tweakable parameters
        frontRange = 0.7
        minRange = .5
        maxRange = .7
        amount = 0.1

        # important sensors
        minFront = min(self.get('robot/range/front/value'))
        minLeft  = min(self.get('robot/range/front-left/value'))
        minRight = min(self.get('robotrange/front-right/value'))
        left =  min(self.get('robot/range/left/value'))
        right = min(self.get('robot/range/right/value'))

        # the decision algorithm
        if minFront < frontRange:
            if not self.blockedFront:
                self.direction = -1
            self.blockedFront = 1
            return [0, self.direction * amount]
        else:
            self.blockedFront = 0
        if minLeft < minRange:
            if self.blockedFront:
                return [0, self.direction * amount]
            else:
                return [amount/2.0, -amount]
        elif minLeft > maxRange:
            if self.blockedFront:
                return [0, self.direction * amount]
            else:
                return [amount/2.0, amount]
        elif minRight < minRange:
            if self.blockedFront:
                return [0, self.direction * amount]
            else:
                return [amount, amount]
        else:
            self.blockedFront = 0
            return [0.1, 0.0]

    def step(self):

        # display count
        if self.verbosity > 0: print self.counter
        if self.counter > self.stopTime:
            self.net.saveWeightsToFile(self.path + 'network.wts')
            self.ravq.saveRAVQToFile(self.path + 'ravq.pck')
            self.ravqInfo.write(str(self.ravq))
            self.destroy() # closes files
            self.pleaseStop()

        # use self.avoidObstacles() to change primitive behavior 
        motors = self.avoidObstacles()

        # scale values that the network will use
        inputs = map(self.scaleSensors, self.get('robot/range/all/value'))
        targets =  map(self.scaleMotors, motors)

        # record the data for later offline learning
        self.saveListToFile(inputs + targets, self.data)

        # classify the data using the ravq
        self.ravq.input(inputs + targets)
        # autolabel the ravq models (slow)
        self.ravq.autoLabel('decimal')

        if self.verbosity > 0:
            print " RAVQ Winner: ", self.ravq.getLabel(self.ravq.winner)
            print " Number of Models: ", len(self.ravq.models)
            print " MovingAvgDistance: ", self.ravq.movingAverageDistance
            print " ModelVectorDistance: ", self.ravq.modelVectorsDistance

        # kick if things get bad
        if self.get('robot/stall'):
            self.wasStalled += 1
            if self.wasStalled > 10:
                print 'Kicking!'
                self.kick()
                self.wasStalled = 0

        if self.method:
            # this method uses a buffer populated with input target pairs
            # that occur at model vector changes
            if self.ravq.getNewWinner(): # 1 if the winner is new
                if len(self.buffer) >= self.bufferSize:
                    self.buffer = self.buffer[1:] + [inputs + targets]
                else:
                    self.buffer.append(inputs + targets)
            self.ravq.logHistory() # record of RAVQ winners
            if len(self.buffer) > 0: # cycle through current buffer
                array = self.buffer[self.bufferIndex]
                self.bufferIndex = (self.bufferIndex + 1) % len(self.buffer)
                error, correct, total, totalPCorrect = self.net.step(input = array[:self.inSize], \
                                                                     output = array[self.inSize:])
                self.netInfo.write(str(self.counter) + "\t" + str(error) + "\n")
                self.saveListToFile(array, self.balancedData)
        else:
            # this method uses buffers associated with individual model
            # vectors. these buffers are implemented in ravq.py
            if self.ravq.getHistoryLength() > 0:
                array = self.ravq.getHistory(self.bufferIndex)
                self.bufferIndex = (self.bufferIndex + 1) % self.ravq.getHistoryLength()
                self.net.step(input = array[:self.inSize], output = array[self.inSize:])
                self.saveListToFile(array, self.balancedData)

        # move the robot according to the primitive controller 
        self.move(motors[0], motors[1])

        # sleep, record motor values, increment counter
        time.sleep(self.sleepTime)
        # optional additional input of motor values
        self.previous = motors[:]
        self.counter += 1

def INIT(engine):
    return GovernorBrain('GovernorBrain', engine)

if __name__ == '__main__':
    os.system("pyro -r Khepera -b /local/GovernorBrain.py")