| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
"""
This program will read and train a neural network from a CSV file.
"""
from pyrobot.brain.conx import *
def scale(vect):
"""
This function will automatically create a scaling of all columns.
"""
unique = [s for s in set(vect)]
unique.sort()
mapping = {}
if len(unique) > 1:
mapping.update( zip([val for val in unique], [i*1.0/(len(unique)-1) for i in range(len(unique))]) )
else:
#if there is only one thing in the category it can't matter too much
mapping.update( [(unique.pop(), 0.0)] )
return [mapping[v] for v in vect]
def transpose(table):
"""
Returns the transpose of the table. This swaps cols for rows and rows for cols.
"""
return [[table[j][i] for j in range(len(table))] for i in range(len(table[0]))]
def loadFile(filename, targets = [], inputs = [], names = [], lookup = {}):
print "Loading data '%s'..." % filename
lines = open(filename,'r').readlines()
lines = lines[1:] # remove the headers
# already removed headers
lineLength = len(lines[0].split())
n = 1
count = 0
for rawLine in lines:
line = rawLine.split()
name = line[0].replace('"', '') # remove quotes if NAME has them
if len(line) == lineLength:
if name not in names:
if tuple(line[2:]) in lookup:
print " WARNING: duplicate input line #%d: %s same as %s" % (n, name, lookup[tuple(line[2:])])
names.append(name)
targets.append(line[1])
inputs.append(line[2:])
lookup[tuple(line[2:])] = lookup.get(tuple(line[2:]), "") + name + ","
count += 1
else:
print "ERROR: duplicate name; Skipping line #%d: name: %s" % (n, name)
else:
print "ERROR: invalid data; Skipping line #%d: name: %s" % (n, name)
n += 1
print "Done with '%s'; read %d names..." % (filename, count)
return targets, inputs, names, lookup, count
def loadData(trainfile, testfile):
"""
Loads the data from trainfile and testfile.
"""
# First read each CSV file:
targets, inputs, names, lookup, traincount = loadFile(trainfile)
if testfile != "":
targets, inputs, names, lookup, testcount = loadFile(testfile, targets, inputs, names, lookup)
else:
testcount = 0
# Next, make the scaled codes for each column:
targets = [[val] for val in scale(targets)]
inputs = transpose([scale(col) for col in transpose(inputs)])
# Next, make the patterns
patterns = {}
for n in range(len(inputs)):
name = names[n]
patterns[name] = inputs[n]
# Patterns for output:
patterns["female"] = [0.0]
patterns["male"] = [1.0]
print "Done loading all data!"
maleCount = len([x for x in targets if x[0] == 1])
femaleCount = len([x for x in targets if x[0] == 0])
print "Males: %d Females: %d" % (maleCount, femaleCount)
return (inputs, targets, patterns, traincount, testcount)
##############################################################################
# Here is where you will make all of your changes
##############################################################################
# The names of the files (use "" if no testfile):
trainfile = "allnames.csv"
testfile = "testnames.csv"
# Load the data:
inputs, targets, patterns, traincount, testcount = loadData(trainfile, testfile)
inputs, testInputs = inputs[:traincount], inputs[traincount:]
targets, testTargets = targets[:traincount], targets[traincount:]
##############################################################################
# The rest is the network code:
print "Building network..."
net = Network()
net.setAutoCrossValidation(1) # turns auto cross validation on
net.addLayers(len(inputs[0]), 16, len(targets[0])) # input, hidden, and output layer sizes
# 16 in the line above represents the number of hidden layers
net.setInputs(inputs)
net.setTargets(targets)
# Parameters:
net.tolerance = 0.4 # within this amount to be considered correct
net.epsilon = 0.1 # learning rate
net.momentum = 0.9 # momentum
net.reportRate = 1 # how often to report
net.stopPercent = 0.85 # percentage to get correct
net.useCrossValidationToStop = 1 # 1 = yes, use CV to stop; 0 = use normal TSS
##############################################################################
print "Training..."
net.train(10) # provide a number to test that number of times
##############################################################################
# All Done!, let's test:
net.learning = 0
net.interactive = 1
net.setPatterns(patterns)
net.sweep()
# Now, let's test the test data:
net.interactive = 1
net.setInputs(testInputs)
net.setTargets(testTargets)
net.sweep() |