# Analyze DNN performance with human experts # Copyright (c) 2017, zhangys@zjgsu.edu.cn # requires NVidia Digits to be installed DICT = {} DICT["DNN_vs_Human"] = "20170719-171742-5c31" DICT["VGG-16_transfer_learning"] = "20170719-171742-5c31" DICT["VGG-16_from_scratch"] = '20170722-143332-f077' DICT["GoogLeNet_from_scratch"] = '20170722-113606-c59c' DICT["GoogLeNet_transfer_learning"] = '20170722-190454-1af1' # '20170719-185942-bf65' DICT["AlexNet_transfer_learning"] = '20170803-121318-b153'#'20170802-164043-96f9'# '20170722-205930-3ac4' # '20170720-094939-b665' DICT["AlexNet_from_scratch"]= '20170802-163356-7f3d'#'20170802-162531-3531' #'20170722-152531-da8d' def GetDictKeys(): return DICT.keys() FILE_LIST="/home/zys/Desktop/test2/data/FileList.txt" LABEL_LIST="/home/zys/Desktop/test2/data/LabelList.txt" JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs" # "/var/lib/digits/jobs/" import os import sys caffe_root = os.environ['CAFFE_ROOT'] # '/home/zys/nv-caffe-0.15' # # When use sudo, the environ var may be different than the current terminal sys.path.insert(0,caffe_root+'python') import numpy import pandas from sklearn.metrics import roc_curve, auc, precision_recall_curve from pandas_ml import ConfusionMatrix import matplotlib.pyplot as plt import shutil import inspect # Add path for DIGITS package sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) import digits.config from digits.inference.errors import InferenceError from digits.job import Job from digits import utils # To solve error "Check failed: error == cudaSuccess (10 vs. 0) invalid device ordinal" import caffe print (caffe.__file__) caffe.set_device(0) """ Perform inference on a list of images using the specified model """ def nv_digits_infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu): """ Perform inference on a list of images using the specified model """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) # retrieve image dimensions and resize mode image_dims = dataset.get_feature_dims() height = image_dims[0] width = image_dims[1] channels = image_dims[2] resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash' n_input_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data # load paths from file paths = None with open(input_list) as infile: paths = infile.readlines() # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) image = utils.image.resize_image( image, height, width, channels=channels, resize_mode=resize_mode) input_ids.append(idx) input_data.append(image) n_input_samples = n_input_samples + 1 except utils.errors.LoadImageError as e: print e # perform inference if layers != 'none': raise InferenceError("Layer visualization is not supported for multiple inference") outputs = model.train_task().infer_many( input_data, snapshot_epoch=epoch, gpu=gpu, resize=True) return outputs["softmax"] def nv_digits_infer_test_model(model, human_results_dir): print ('####### ' + model + ' #########') labels = [] with open(LABEL_LIST) as f: for line in f: if('0' in line): labels.append(True) elif('1' in line): labels.append(False) else: print "Unrecognized label!" DIGITS_JOB_ID = DICT[model] softmax = nv_digits_infer(FILE_LIST, "/home/zys/data/C3R/test/tmp/", JOBS_DIR, DIGITS_JOB_ID, -1, 1, 'none', 0) lines=[] probs = [] for idx, p in enumerate(softmax): probs.append(p[0]) lines.append(str(p[0])+" "+str(p[1])+" "+str(p[2])) with open(model+'_softmax_probs.txt', 'w') as the_file: for line in lines: the_file.write(line+'\n') files=[] with open(FILE_LIST) as f: for line in f: line = line.strip() files.append(line) dic = {} i=0 for f in files: dic[os.path.basename(f)] = labels[i] i+=1 humanTitles = [] humanTPR = [] humanFPR = [] humanPrecision=[] humanRecall = [] for fn in os.listdir(human_results_dir): ks=[] ls=[] print "\n\n******", fn, "******" humanTitles.append(fn) with open(os.path.join(human_results_dir, fn)) as f: for line in f: line = line.strip() k = line.split(',')[0] l = line.split(',')[1] if(k not in ks): ks.append(k) if(l=='1'): ls.append(True) else: ls.append(False) rs=[] for k in ks: rs.append(dic[k]) cm = ConfusionMatrix(rs, ls) # print cm cm.print_stats() s= cm.stats() humanTPR.append(s['TPR']) humanFPR.append(s['FPR']) # 1- TNR humanPrecision.append(s['PPV']) humanRecall.append(s['TPR']) nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall) def nv_digits_infer_test_model3(fname1, fname2, model, human_results_dir): probs=[] with open(fname1) as f: for line in f: fields = line.split() if(len(fields) == 8): ind = fields.index("Disease") v = float(fields[ind+1].strip('%'))/100 probs.append(v) labels = [] with open(fname2) as f: for line in f: if('0' in line): labels.append(True) elif('1' in line): labels.append(False) else: print "Unrecognized label!" files=[] with open(FILE_LIST) as f: for line in f: line = line.strip() files.append(line) dic = {} i=0 for f in files: dic[os.path.basename(f)] = labels[i] i+=1 orig_stdout = sys.stdout log = open(model + '_humans.txt', 'w') sys.stdout = log humanTitles = [] humanTPR = [] humanFPR = [] humanPrecision=[] humanRecall = [] humanTNR=[] humanF1=[] humanMCC=[] humanInformedness=[] humanPPV=[] for fn in os.listdir(human_results_dir): ks=[] ls=[] rs=[] log.write( "\n\n******" + fn + "******\n") log.write( "\n---- Misclassified Files (filename, truth, label)---\n") humanTitles.append(fn) with open(os.path.join(human_results_dir, fn)) as f: for line in f: line = line.strip() k = line.split(',')[0] l = line.split(',')[1] if(k not in ks): ks.append(k) b=(l=='1') ls.append(b) rs.append(dic[k]) if(b!=dic[k]): log.write(k + '\t' + str( dic[k]) + '\t' + str(b) + '\n') cm = ConfusionMatrix(rs, ls) # print cm cm.print_stats() s= cm.stats() humanTPR.append(s['TPR']) humanFPR.append(s['FPR']) # 1- TNR humanPrecision.append(s['PPV']) humanRecall.append(s['TPR']) humanTNR.append(s['TNR']) humanF1.append(s['F1_score']) humanMCC.append(s['MCC']) humanInformedness.append(s['informedness']) humanPPV.append(s['PPV']) log.write( '\n\n ******** Summary *******\n') log.write( "\nExpert\tTPR\t\FPR\tPPV\tTNR\tF1_score\tMCC\tinformednessn\n") i=0 for human in humanTitles: print human, '\t', humanTPR[i], '\t', humanFPR[i], '\t', humanPPV[i], '\t', humanTNR[i], '\t', humanF1[i], '\t', humanMCC[i], '\t',humanInformedness[i], '\n\n' i+=1 sys.stdout = orig_stdout log.close() nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall) def nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall): # fprs = [] # tprs = [] # #for threshold in range(1,1000): # tpr, fpr = nv_digits_infer_one_round(threshold*0.001, softmax1, softmax2) # fprs.append(fpr) # tprs.append(tpr) y_actu = labels y_pred = probs #### CONFUSINO MATRIX #### #cm = ConfusionMatrix(y_actu, y_pred) #print cm #cm.print_stats() #### ROC Curve ##### fpr, tpr, thresholds = roc_curve(y_actu, y_pred, pos_label=True) # The Youden index J (Youden, 1950) is defined as: # J = max { sensitivityc + specificityc - 1 } # where c ranges over all possible criterion values. # Graphically, J is the maximum vertical distance between the ROC curve and the diagonal line. # The criterion value corresponding with the Youden index J is the optimal criterion value only when disease prevalence is 50%, equal weight is given to sensitivity and specificity, and costs of various decisions are ignored. youden = tpr - fpr # = tpr + (1-fpr) -1 = tpr - fpr # Sensitivity + Specificity - 1 best_ind = youden.argmax() best_threshold = thresholds[best_ind] orig_stdout = sys.stdout f = open(model + '.txt', 'w') sys.stdout = f thresh = 0.164 print '\nP-R curve cutoff = ', thresh # Find prediction to the dataframe applying threshold y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False) cm = ConfusionMatrix(y_actu, y_pred_labels) # print cm cm.print_stats() thresh = 0.174 print '\nP-R curve cutoff = ', thresh # Find prediction to the dataframe applying threshold y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False) cm = ConfusionMatrix(y_actu, y_pred_labels) # print cm cm.print_stats() thresh = 0.379 print '\nP-R curve cutoff = ', thresh # Find prediction to the dataframe applying threshold y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False) cm = ConfusionMatrix(y_actu, y_pred_labels) # print cm cm.print_stats() thresh = 0.519 # argmax(F1 score) point for VGG print '\nP-R curve cutoff = ', thresh # Find prediction to the dataframe applying threshold y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False) cm = ConfusionMatrix(y_actu, y_pred_labels) # print cm cm.print_stats() thresh = 0.535 print '\nP-R curve cutoff = ', thresh # Find prediction to the dataframe applying threshold y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False) cm = ConfusionMatrix(y_actu, y_pred_labels) # print cm cm.print_stats() print '\nROC curve optimal cutoff = ', best_threshold # Find prediction to the dataframe applying threshold y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > best_threshold else False) cm = ConfusionMatrix(y_actu, y_pred_labels) # print cm cm.print_stats() # Get FP and FN samples thresh = 0.519 y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False) cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) directoryFP = cwd + '/' + model+'_FP' if os.path.exists(directoryFP): shutil.rmtree(directoryFP) os.mkdir(directoryFP) directoryFN = cwd + '/' + model+'_FN' if os.path.exists(directoryFN): shutil.rmtree(directoryFN) os.mkdir(directoryFN) i=0 with open(FILE_LIST) as f: for line in f: line = line.strip() pred_label = y_pred_labels[i] actu_label = y_actu[i] basename = os.path.basename(line) new_file_name="{0:.3f}".format(y_pred[i]) + "_" +basename if(actu_label == True and pred_label == False): shutil.copyfile(line, directoryFN+"/"+ new_file_name) if(actu_label == False and pred_label == True): shutil.copyfile(line, directoryFP+"/"+ new_file_name) i+=1 roc_auc = auc(fpr, tpr) print 'ROC AUC = ', roc_auc # Use precision recall curve for skewed / unbalanced data precision, recall, pr_thresholds = precision_recall_curve(y_actu, y_pred, pos_label=True) f1 = 2.0*precision*recall/(precision+recall) pr_best_ind = f1.argmax() pr_best_threshold = pr_thresholds[pr_best_ind] print 'precision_recall_curve optimal cutoff ', model, ' = ', pr_best_threshold sys.stdout = orig_stdout f.close() plt.figure() plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.3f)' % (roc_auc)) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') # plt.plot(fpr[best_ind], tpr[best_ind], 'ro', label='optimal cutoff') # (probability = %0.3f)' % (best_threshold)) plt.plot(humanFPR, humanTPR, 'bo', label='human experts') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve (' + model + ') ' ) plt.legend(loc="lower right") plt.savefig(model + "_roc_curve.png") #plt.show() plt.close() # pr_auc=auc(precision, recall) plt.figure() plt.plot(recall, precision, lw=1, label='P-R Curve') plt.plot(humanRecall, humanPrecision, 'bo', label='human experts') # plt.plot(recall[pr_best_ind], precision[pr_best_ind], 'ro') #, label='best threshold (%0.3f)' % (best_threshold)) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.ylabel('Precision') plt.xlabel('Recall') plt.title('Precision-Recall Curve (' + model + ') ' ) plt.legend(loc="lower right") plt.savefig(model + "_pr_curve.png") #plt.show() plt.close() myfile = open(model + "_pr_curve.txt", "w") myfile.write('threshold\trecall\tprecision\n\n') i=0 for th in pr_thresholds: myfile.write(str(th) + str(recall[i]) + '\t'+ str(precision[i]) + '\n') i+=1 nv_digits_infer_test_model('DNN_vs_Human',"/home/zys/Desktop/scripts/human") # Batch inference # nv_digits_infer_test_model3("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet vs Human", "/home/zys/Desktop/scripts/human") # special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15. Use provided probs file