# Split a dataset based on an attribute and an attribute value
def t_split(x, y, z):
l, r = list(), list()
for row in z:
    if row[x] < y:
        l.append(row)
    else:
        r.append(row)
return l, r

# Calculate the Gini index for a split dataset
def gini_index(grp, class_values):
gini = 0.0
for class_value in class_values:
    for group in grp:
        size = len(group)
        if size == 0:
            continue
        proportion = [row[-1] for row in group].count(class_value) / float(size)
        gini += (proportion * (1.0 - proportion))
return gini

# Select the best split point for a dataset
def get_split(dataset):
class_values = list(set(row[-1] for row in dataset))
b_index, b_value, b_score, b_groups = 999, 999, 999, None
for index in range(len(dataset[0])-1):
    for row in dataset:
        groups = t_split(index, row[index], dataset)
        gini = gini_index(groups, class_values)
        if gini < b_score:
            b_index, b_value, b_score, b_groups = index, row[index], gini, groups
return {'index':b_index, 'value':b_value, 'groups':b_groups}

# Create a terminal node value
def to_terminal(group):
outcomes = [row[-1] for row in group]
return max(set(outcomes), key=outcomes.count)

# Create child splits for a node or make terminal
def split(node, max_depth, min_size, depth):
left, right = node['groups']
del(node['groups'])
# check for a no split
if not left or not right:
    node['left'] = node['right'] = to_terminal(left + right)
    return
# check for max depth
if depth >= max_depth:
    node['left'], node['right'] = to_terminal(left), to_terminal(right)
    return
# process left child
if len(left) <= min_size:
    node['left'] = to_terminal(left)
else:
    node['left'] = get_split(left)
    split(node['left'], max_depth, min_size, depth+1)
# process right child
if len(right) <= min_size:
    node['right'] = to_terminal(right)
else:
    node['right'] = get_split(right)
    split(node['right'], max_depth, min_size, depth+1)

# Build a decision tree
def build_tree(train, max_depth, min_size):
root = get_split(train)
split(root, max_depth, min_size, 1)
return root

# Print a decision tree
def print_tree(node, depth=0):
if isinstance(node, dict):
    print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
    print_tree(node['left'], depth+1)
    print_tree(node['right'], depth+1)
else:
    print('%s[%s]' % ((depth*' ', node)))

dataset = [[2.771244718,1.784783929,0],
[1.728571309,1.169761413,0],
[3.678319846,2.81281357,0],
[3.961043357,2.61995032,0],
[2.999208922,2.209014212,0],
[7.497545867,3.162953546,1],
[9.00220326,3.339047188,1],
[7.444542326,0.476683375,1],
[10.12493903,3.234550982,1],
[6.642287351,3.319983761,1]]
tree = build_tree(dataset, 1, 1)
print_tree(tree)

While some answer "line by line" others note the issue of cognitive dissonance. That is, at some point you back up to what you were trying to do and implement it in the language you need it to be in.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.