Beruflich Dokumente
Kultur Dokumente
if num in [1,5,9,13]:
plt.ylabel(name_of_list[i]) 在最左邊標⽰y label
#
if num in [13,14,15,16]:
plt.xlabel(name_of_list[j]) # 在最下⾯標⽰x label
if j==0:#設定 x_axis 的範圍
plt.xlim((4,8))
elif j==1:
plt.xlim((1.5,4.5))
elif j==2:
plt.xlim((1,7))
else:
plt.xlim((0,2.5))
num+=1
plt.show()
# 產⽣
mean 與
standard deviation,因為之後需要產⽣ probability density function
mean = [sepal_len.mean(),sepal_width.mean(),pedal_len.mean(),pedal_width.mean()]
std = [sepal_len.std(),sepal_width.std(),pedal_len.std(),pedal_width.std()]
#把事前機率先紀錄起來
prior_list = []
for i in range(3):
prior_list.append(len(pedal_len[type_flower ==flower[i]])/len(training_set))
#pedal_len[type_flower == flower[i]] return the data of type_flower == flowe
r[i]
# using len() to calculate the number of each type of flower
print('P(',flower[i],')=',prior_list[i])
est_std ={}
for i in range(3):
est_std['SEPAL_LEN '+ flower[i]]=sepal_len[type_flower == flower[i]].std()
for i in range(3):
est_std['SEPAL_WIDTH '+ flower[i]]=sepal_width[type_flower ==
flower[i]].std()
for i in range(3):
est_std['PEDAL_LEN '+ flower[i]]=pedal_len[type_flower == flower[i]].std()
for i in range(3):
est_std['PEDAL_WIDTH '+ flower[i]]=pedal_width[type_flower ==
flower[i]].std()
把
# qda 的 discriminate 式⼦包成 function
import math
def qda_discriminate(mean,std_deviation,prior,x):
return -math.log(std_deviation)-(x-mean)**2/(2*std_deviation**2) +
math.log(prior)
# qda_classifier
def QDA_sepal_len_classifier(x):
g = []# 產⽣空⽩的list g
for i in range(3):
g.append(qda_discriminate(est_m['SEPAL_LEN '+flower[i]],est_std['SEPAL_L
EN '+flower[i]],prior_list[i],x) )
將 與
# est_m est_std 依照SEPAL_LEN 與 花的種類 產⽣對應的 輸⼊ discriminate fu
nction 產⽣ score
#並且紀錄在 裡⾯ g
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def QDA_pedal_width_classifier(x):
g = []
for i in range(3):
g.append(qda_discriminate(est_m['PEDAL_WIDTH '+flower[i]],est_std['PEDAL
_WIDTH '+\
flower[i]],prior_lis
t[i],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def QDA_pedal_len_classifier(x):
g = []
for i in range(3):
g.append(qda_discriminate(est_m['PEDAL_LEN '+flower[i]],est_std['PEDAL_L
EN '+flower[i]],prior_list[i],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
# 因為 的
LDA variance 相同,因此我們需要加權平均算出 weighted variance 作為estimated par
ameter
# 權重為 prior probability
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def LDA_sepal_width_classifier(x):
g = []
for i in range(3):
g.append(lda_discriminate(est_m['SEPAL_WIDTH '+flower[i]],sepal_width_we
ighted_var,prior_list[i],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def LDA_pedal_len_classifier(x):
g = []
for i in range(3):
g.append(lda_discriminate(est_m['PEDAL_LEN '+flower[i]],pedal_len_weight
ed_var,prior_list[i],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def LDA_pedal_width_classifier(x):
def LDA_pedal_width_classifier(x):
g = []
for i in range(3):
g.append(lda_discriminate(est_m['PEDAL_WIDTH '+flower[i]],pedal_width_we
ighted_var,prior_list[i],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def nmc_sepal_len_classifier(x):
g = []
for i in range(3):
g.append(nmc_discriminate(est_m['SEPAL_LEN '+flower[i]],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def nmc_sepal_width_classifier(x):
g = []
for i in range(3):
g.append(nmc_discriminate(est_m['SEPAL_WIDTH '+flower[i]],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
def nmc_pedal_len_classifier(x):
g = []
for i in range(3):
g.append(nmc_discriminate(est_m['PEDAL_LEN '+flower[i]],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
max_g = g[i]
return flower[max_i]
def nmc_pedal_width_classifier(x):
g = []
for i in range(3):
g.append(nmc_discriminate(est_m['PEDAL_WIDTH '+flower[i]],x) )
max_g = g[0]
max_i = 0
for i in range(3):
if max_g < g[i]:
max_i = i
max_g = g[i]
return flower[max_i]
nmc_classifier = {'sepal_len':nmc_sepal_len_classifier,'sepal_width':nmc_sepal_w
idth_classifier,\
'pedal_len':nmc_pedal_len_classifier,'pedal_width':nmc_pedal_w
idth_classifier}
test_set = rawdat[rawdat['TRAIN'] == 0 ]
test_sepal_len = pd.Series.as_matrix(test_set['SEPALLEN Length of Sepals'])
test_sepal_width = pd.Series.as_matrix(test_set['SEPALWID Width of Sepals'])
test_pedal_len = pd.Series.as_matrix(test_set['PETALLEN Length of Petals'])
test_pedal_width = pd.Series.as_matrix(test_set['PETALWID Width of Petals'])
test_type_flower = pd.Series.as_matrix(test_set['IRISTYPE Three types of iris'])
test={'sepal_len':test_sepal_len,'sepal_width':test_sepal_width,'pedal_len':test
_pedal_len,\
'pedal_width':test_pedal_width}
## Confusion Matrix
### Create the confusion matrix function, it will be used for all the Classifier
for i in range(len(testdata)):
predict = classifier(testdata[i])
col = flower.index(sol[i])#confusion matrix 的
column 對應的是observed
row = flower.index(predict) # row 對應的是 predict
if col == row :
confusion_m[row,col]+= 1
else:
confusion_m[row,col]+= 1
#因為我們之前有把不同的 包成⼀個
classifier dictionay type,
#因此我們的這個 可以對不同的
function classfier ⽣成不同的 confusion matrix
def Analysis_Matrix(Classifier,test,test_type_flower):
Dict= {}
for i in range(4):
name = name_of_list[i]
Dict[name] = confusion_matrix(Classifier[name],test[name],test_type_flow
er,flower)
# 回傳的confusion matrix 以 feature data 最為 值去
key access
return Dict
QDA_matrix = Analysis_Matrix(qda_classifier,test,test_type_flower)
LDA_matrix = Analysis_Matrix(lda_classifier,test,test_type_flower)
NMC_matrix = Analysis_Matrix(nmc_classifier,test,test_type_flower)
Confusion_Matrix = [QDA_matrix,LDA_matrix,NMC_matrix]
#包成⼀個⼤的 Confusion_Matrix 只是⽅便我們做輸出
print('----------------Confusion Matrix-----------------')
Confusion_Matrix_name = ['QDA_matrix','LDA_matrix','NMC_matrix']
Average_Accuracy ={}
All_Accuracy = {}
for i in range(len(Confusion_Matrix)):
a = Confusion_Matrix_name[i]
print(a)# 先把Confusion_Matrix 的Analysis Method 印出來
sum_of_accuracy = 0
for b in Confusion_Matrix[i]:
print(b)# 輸出
Confusion_Matrix 的 值,也就是
key feature name
print(Confusion_Matrix[i][b])# 輸出
confusion matrix
diagonal_sum = np.ndarray.diagonal(Confusion_Matrix[i][b]).sum()# trace
total_test = Confusion_Matrix[i][b][-1][-1]# access 右下⾓的 total test si
ze
accuracy = (diagonal_sum-total_test)/total_test #產⽣ accuracy
print('Accuracy: ', accuracy )
All_Accuracy[a+b] = accuracy
sum_of_accuracy += accuracy# 累加 accuracy ⽅便之後算 average accuracy
print('Average Accuracy',sum_of_accuracy/len(Confusion_Matrix[i]))
Average_Accuracy[a] = sum_of_accuracy/len(Confusion_Matrix[i])
把
# average accuracy 存到
Average_Accuracy 這個
dictionary type 之後可以直接⽤ fea
ture name 為
key 去
access
print('------------------------------------')
print('\n\n--------------------Average Accuracy--------------------------')
print(Average_Accuracy)
# Rank:
## 1. QDA & NMC
## 2. LDA
## 2. LDA
--------------------Average Accuracy--------------------------
{'NMC_matrix': 0.79500000000000004, 'LDA_matrix': 0.789999999999999
92, 'QDA_matrix': 0.79500000000000004}
輸出依照
----------------------- analysis method與 feature 各⾃不同的
accuracy------------------------
QDA_matrix sepal_len 0.72
LDA_matrix sepal_len 0.7
NMC_matrix sepal_len 0.72
QDA_matrix sepal_width 0.54
LDA_matrix sepal_width 0.54
NMC_matrix sepal_width 0.54
QDA_matrix pedal_len 0.94
LDA_matrix pedal_len 0.94
NMC_matrix pedal_len 0.94
QDA_matrix pedal_width 0.98
LDA_matrix pedal_width 0.98
NMC_matrix pedal_width 0.98
******************************** -1/(2*weighted varince) **********
************************
sepal_len -1.87688852277
sepal_width -4.32780649309
pedal_len -2.66146400711
pedal_width -10.9069131084
In [ ]: