from Bio import LogisticRegression
from Bio import kNN
import math

def model_accuracy(cmethod, x_var, y_var, Etext): 
	''' Calculate model accuracy statistics for a logistic regression of k-nearest Neighbour using test input and test output with Etext being just a comment string to display. Returns a list of predicted value.'''
	TP=0.0           # Number of True Positives
	TN=0.0           # Number of True Negatives
	FP=0.0           # Number of False Positives
	FN=0.0           # Number of False Negatives
	pred_list=[]
	for x_pred, y_pred in zip(x_var, y_var):
		if cmethod != 'LR':
			prediction=kNN.classify(kNNmodel, x_pred)
		else:
			prediction=LogisticRegression.classify(lrmodel, x_pred)
		pred_list.append(prediction)
		if y_pred ==1 and prediction ==1:
			TP += 1
		elif y_pred ==1 and prediction !=1:
			FN += 1
		elif y_pred != 1 and prediction ==1:
			FP += 1
		elif y_pred != 1 and prediction !=1:
			TN += 1
		else:
			print 'Error: unexpected prediction ', prediction, ' for ', y_pred
	TotalObs=TP+TN+FP+FN
	Prevalence=(TP+FN)/TotalObs
	CorrectClass=((TP+TN)/TotalObs)
	if (TP+FN) > 0:
		Sens=TP/(TP+FN)
	if (TN+FP) > 0:
		Spec=TN/(TN+FP)
	Ncorr=((TP*TN)-(FP*FN));
	Dcorr=math.sqrt((TP+FN)*(TP+FP)*(TN+FN)*(TN+FP))
	if Dcorr != 0:
		Correlation=Ncorr/Dcorr
	if (TP+FP)> 0:
		PosPrecison=TP/(TP+FP)
	if (TN+FN) >0:
		NegPrecison=TN/(TN+FN)
	print
	print "Model Accuracy Statistics: %s" % (Etext)
	print 'Number of True Positives=', TP
	print 'Number of True Negatives=', TN
	print 'Number of False Positives=', FP
	print 'Number of False Negatives=', FN
	print 'Prevalence (proportion True Positives out of all observations)=', Prevalence
	print 'Correct Classification Rate=', CorrectClass
	print 'Sensitivity (proportion of True positives that were correctly predicted)=', Sens
	print 'Specificity (proportion of True negatives that were correctly predicted)=', Spec
	print 'Matthews Correlation Coefficient=', Correlation
	print 'Positive Precision (proportion of predicted positives that are true positives)=', PosPrecison
	print 'Negative Precision (proportion of predicted negatives that are true negatives)=', NegPrecison
	print

	return pred_list



xs = [[-53, -200.78], [117, -267.14], [57, -163.47], [16, -190.30], [11, -220.94], [85, -193.94], [16, -182.71], [15, -180.41], [-26, -181.73], [58, -259.87], [126, -414.53], [191, -249.57], [113, -265.28], [145, -312.99], [154, -213.83], [147, -380.85], [93, -291.13]]
ys = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]

lrmodel = LogisticRegression.train(xs, ys)
kvalue=3 # number of nearest neighbors
kNNmodel = kNN.train(xs, ys, kvalue)

lr_preds=model_accuracy(cmethod='LR', x_var=xs, y_var=ys, Etext='Logistic regression model')

print lr_preds

knn_preds=model_accuracy(cmethod='kNN', x_var=xs, y_var=ys, Etext='k-Nearest Neighbor model')

print knn_preds


genepairs=['yxcE - yxcD', 'yxiB - yxiA']
testcases=[[6, -173.143442352], [309, -271.005880394]]

for tc, gp in zip(testcases, genepairs):
	pLR=LogisticRegression.classify(lrmodel, tc)
	pNN=kNN.classify(kNNmodel, tc)
	print 'Gene Pair: ', gp, ': Predicted to belong in OP class by logistic regression? ', pLR==1
	print 'Gene Pair: ', gp, ': Predicted to belong in OP class by k-nearest neighbor? ', pNN==1

