from random import seedfrom random import randrangefrom csv import reader# Step 1 Calculating a impurity Index# Calculate the impurity index for a split csv filedef impurity_measure(groups, classes): # count all samples at split point n_instances = float(sum(len(group) for group in groups)) # sum weighted impurity index for each group impurity = 0.0 for group in groups: size = float(len(group)) # avoid divide by zero if size == 0: continue score = 0.0 # score the group based on the score for each class for class_val in classes: p = row-1 for row in group.count(class_val)/size score += p*p # weight the group score by its relative size impurity += (1.0 – score)*(size/n_instances) return impurity# Step 2 Create Split# We next need to split the csv file into two.(Attribute and Value)# This is done in two stages# First we split the data based on attribute and an associated valuedef split_data(index, value, csv_file): left, right = list(), list() for row in csv_file: if rowindex < value: left.append(row) else: right.append(row) return left, right# We then evaluate the split of the csv file to find the best onedef evaluate_split(csv_file): class_values = list(set(row-1 for row in csv_file)) b_index, b_value, b_score, b_groups = 999,999,999, None for index in range(len(csv_file0)-1): for row in csv_file: groups = split_data(index, rowindex, csv_file) impurity = impurity_measure(groups, class_values) if impurity < b_score: b_index, b_value, b_score, b_groups = index, rowindex, impurity, groups return {'index': b_index, 'value':b_value, 'groups':b_groups}# Step 3 Build a Tree# Step 3.1 Terminal Nodes# Create a terminal node valuedef to_terminal(group): outcomes = row-1 for row in group return max(set(outcomes), key=outcomes.count)# Step 3.2 Recursive Splitting# Create child splits for a node or make terminaldef split(node, tree_depth, min_rows_per_node, node_depth): left, right = node'groups' del(node'groups') if not left or not right: node'left' = node'right' = to_terminal(left+right) return if node_depth >= tree_depth: node’left’, node’right’ = to_terminal(left), to_terminal(right) return if len(left)<= min_rows_per_node: node'left' = to_terminal(left) else: node'left' = evaluate_split(left) split(node'left', tree_depth, min_rows_per_node, node_depth+1) if len(right)<= min_rows_per_node: node'right' = to_terminal(right) else: node'right' = evaluate_split(right) split(node'right', tree_depth, min_rows_per_node, node_depth+1)# Step 3.3 Building a Treedef build_cart(train, tree_depth, min_rows_per_node): root = evaluate_split(train) split(root, tree_depth, min_rows_per_node, 1) return root# Step 3.4 Print Decision Treedef printout_tree(node, node_depth=0): if isinstance(node, dict): print ('%sX%d < %.3f' %((node_depth*'', (node'index'+1), node'value'))) print_tree(node'left', node_depth+1) print_tree(node'right', node_depth+1) else: print(' %s%s' %((node_depth*'', node)))# Step 4 Make a Prediction# Make a prediction with a decision treedef predict(node, row): if rownode'index'


I'm Katy!

Would you like to get a custom essay? How about receiving a customized one?

Check it out