用graphviz绘制决策树时出现问题



我一整天都在用这个捶胸顿足。我的导师发布的代码似乎也不起作用。。。谈论沮丧。这是我当前状态下的代码。我不知所措。我能够建立一个混淆矩阵并获得模型的准确性,但我在绘制决策树时运气不佳。请参阅我的代码中的最后一块注释,了解我的问题的细节。在这一点上,任何和所有的帮助都将是如此惊人。提前谢谢!

# -*- coding: utf-8 -*-
"""
Spyder Editor
"""
import pandas
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import sklearn.metrics
import os
from sklearn import tree
from io import StringIO
from IPython.display import Image
import pydotplus
import graphviz
os.chdir("..")
# Set Display options
pandas.set_option('display.max_columns', None)
pandas.set_option('display.max_rows', None)
# Read in the Mars Crater Data
print("reading data set...")
data = pandas.read_csv('marscrater2_pds.csv', low_memory=(False))
print("cleaning data...")
data_clean = data.dropna()
data_clean['DIAM_CIRCLE_IMAGE'] = pandas.to_numeric(data_clean['DIAM_CIRCLE_IMAGE'], downcast="float")
data_clean['DEPTH_RIMFLOOR_TOPOG'] = pandas.to_numeric(data_clean['DEPTH_RIMFLOOR_TOPOG'], downcast="float")
data_clean['LATITUDE_CIRCLE_IMAGE'] = pandas.to_numeric(data_clean['LATITUDE_CIRCLE_IMAGE'], downcast="float")
data_clean['LONGITUDE_CIRCLE_IMAGE'] = pandas.to_numeric(data_clean['LONGITUDE_CIRCLE_IMAGE'], downcast="float")
data_clean['NUMBER_LAYERS'] = pandas.to_numeric(data_clean['NUMBER_LAYERS'], downcast="integer")
data_clean.dtypes
data_clean.describe()
predictors = data_clean[['LATITUDE_CIRCLE_IMAGE', 'LONGITUDE_CIRCLE_IMAGE',
'DIAM_CIRCLE_IMAGE', 'NUMBER_LAYERS']]
print("converting target into a binary categorical variable...")
def OLD(row):
if row['DEPTH_RIMFLOOR_TOPOG'] > 0.25:
return 1
else:
return 0

# apply that changes   
data_clean['IS_DEEP'] = data_clean.apply(lambda row: OLD(row), axis=1)
targets = data_clean["IS_DEEP"]
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=0.4)
pred_train.shape
pred_test.shape
tar_train.shape
tar_test.shape
classifier=DecisionTreeClassifier()
classifier=classifier.fit(pred_train, tar_train)
predictions=classifier.predict(pred_test)
print("building model...n")
print("Confusion Matrix")
print(sklearn.metrics.confusion_matrix(tar_test,predictions))
print("nAccuracy Score")
print(sklearn.metrics.accuracy_score(tar_test,predictions))
#displaying the tree
# This is where I am stuck.
# Can someone please help me render the decision tree?
# this is the code my instructor used in her example and it doesn't work.
# I've seen many variants but none seem to work.  Please bare in mind I am
# a student.  This is all new to me and I am new to stackoverflow (for those
# of you that take offense to the way I asked my question)
out = StringIO()
tree.export_graphviz(classifier, out_file=out)
graph=pydotplus.graph_from_dot_data(out.getvalue())
Image(graph.create_png())

这是我从控制台看到的输出:

Confusion Matrix
[[132697   5324]
[  5168  10549]]
Accuracy Score
0.931754023078224
Traceback (most recent call last):
File "C:Users<username>OneDriveDocumentsIT Nano - Data AnalystMachine LearningML (Week 1).py", line 79, in <module>
Image(graph.create_png())
File "C:ProgramDataAnaconda3libsite-packagespydotplusgraphviz.py", line 1797, in <lambda>
lambda f=frmt, prog=self.prog: self.create(format=f, prog=prog)
File "C:ProgramDataAnaconda3libsite-packagespydotplusgraphviz.py", line 2030, in create
raise InvocationException(
InvocationException: Program terminated with status: 1. stderr follows: Format: "png" not recognized. Use one of:

我自己解决了这个问题。。。

#displaying the tree
out = StringIO()
tree.export_graphviz(classifier, out_file='output.dot')
dot_data = export_graphviz(classifier, out_file=None)
graph=pydotplus.graph_from_dot_data(dot_data)
graph.write_png("output.png")

最新更新