from Bio import LogisticRegression from Bio import kNN import math def model_accuracy(cmethod, x_var, y_var, Etext): ''' Calculate model accuracy statistics for a logistic regression of k-nearest Neighbour using test input and test output with Etext being just a comment string to display. Returns a list of predicted value.''' TP=0.0 # Number of True Positives TN=0.0 # Number of True Negatives FP=0.0 # Number of False Positives FN=0.0 # Number of False Negatives pred_list=[] for x_pred, y_pred in zip(x_var, y_var): if cmethod != 'LR': prediction=kNN.classify(kNNmodel, x_pred) else: prediction=LogisticRegression.classify(lrmodel, x_pred) pred_list.append(prediction) if y_pred ==1 and prediction ==1: TP += 1 elif y_pred ==1 and prediction !=1: FN += 1 elif y_pred != 1 and prediction ==1: FP += 1 elif y_pred != 1 and prediction !=1: TN += 1 else: print 'Error: unexpected prediction ', prediction, ' for ', y_pred TotalObs=TP+TN+FP+FN Prevalence=(TP+FN)/TotalObs CorrectClass=((TP+TN)/TotalObs) if (TP+FN) > 0: Sens=TP/(TP+FN) if (TN+FP) > 0: Spec=TN/(TN+FP) Ncorr=((TP*TN)-(FP*FN)); Dcorr=math.sqrt((TP+FN)*(TP+FP)*(TN+FN)*(TN+FP)) if Dcorr != 0: Correlation=Ncorr/Dcorr if (TP+FP)> 0: PosPrecison=TP/(TP+FP) if (TN+FN) >0: NegPrecison=TN/(TN+FN) print print "Model Accuracy Statistics: %s" % (Etext) print 'Number of True Positives=', TP print 'Number of True Negatives=', TN print 'Number of False Positives=', FP print 'Number of False Negatives=', FN print 'Prevalence (proportion True Positives out of all observations)=', Prevalence print 'Correct Classification Rate=', CorrectClass print 'Sensitivity (proportion of True positives that were correctly predicted)=', Sens print 'Specificity (proportion of True negatives that were correctly predicted)=', Spec print 'Matthews Correlation Coefficient=', Correlation print 'Positive Precision (proportion of predicted positives that are true positives)=', PosPrecison print 'Negative Precision (proportion of predicted negatives that are true negatives)=', NegPrecison print return pred_list xs = [[-53, -200.78], [117, -267.14], [57, -163.47], [16, -190.30], [11, -220.94], [85, -193.94], [16, -182.71], [15, -180.41], [-26, -181.73], [58, -259.87], [126, -414.53], [191, -249.57], [113, -265.28], [145, -312.99], [154, -213.83], [147, -380.85], [93, -291.13]] ys = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0] lrmodel = LogisticRegression.train(xs, ys) kvalue=3 # number of nearest neighbors kNNmodel = kNN.train(xs, ys, kvalue) lr_preds=model_accuracy(cmethod='LR', x_var=xs, y_var=ys, Etext='Logistic regression model') print lr_preds knn_preds=model_accuracy(cmethod='kNN', x_var=xs, y_var=ys, Etext='k-Nearest Neighbor model') print knn_preds genepairs=['yxcE - yxcD', 'yxiB - yxiA'] testcases=[[6, -173.143442352], [309, -271.005880394]] for tc, gp in zip(testcases, genepairs): pLR=LogisticRegression.classify(lrmodel, tc) pNN=kNN.classify(kNNmodel, tc) print 'Gene Pair: ', gp, ': Predicted to belong in OP class by logistic regression? ', pLR==1 print 'Gene Pair: ', gp, ': Predicted to belong in OP class by k-nearest neighbor? ', pNN==1