[BioPython] NeuralNetworks

JP Glutting jpg@research.dfci.harvard.edu
Fri, 15 Nov 2002 11:07:59 -0500


This is a multi-part message in MIME format.
--------------000600070107050908070401
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

Hi Mike,

I wrote a little script to try to do MHC binding prediction. It does not 
work, as a predictor, but it is an example. There are also some good 
tidbits of information in the modules themselves (like different issues 
related to increases in errors in the validation set).

Let me know if you have any questions - I have not looked at this in 
months, but I still remember some of the reasons I set it up this way.

And, if anyone out there really knows something about Neural Networks, I 
would love to get some good feedback.

Cheers,

JP



Mike Sears wrote:
> Can anyone point me to some example code for a simple BPN using the Bio 
> classes provided in Biopython.
> 
> Thanks,
> 
> Mike Sears
> 
> Michael Sears, Ph.D.
> Department of Life Sciences
> Indiana State University
> Terre Haute, IN 47809
> 
> 812-237-9638
> _______________________________________________
> BioPython mailing list  -  BioPython@biopython.org
> http://biopython.org/mailman/listinfo/biopython
> 



--------------000600070107050908070401
Content-Type: text/plain;
 name="test_nnbp.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="test_nnbp.py"

#!/usr/bin/env python
import random

aas =  ['G','A','V','I','L','M','F','Y','W','H','C','P','K','R','D','E','Q','N','S','T']
datamask = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,]
kbpeps = ['SIINFEKL', 'HIYEFPQL', 'SNYLFTKL', 'SSLPFQNI', 'RCQIFANI', 'SSWDFITV', 'SSISFCGV', 'LSPFPFDL', 'INFDFPKL', 'QTFDFGRL', 'INYEYAIV', 'TTIHYKYM', 'NAIVFKGL', 'AVYNFATC', 'VYDFFVWL', 'EQYKFYSV', 'AQYKFIYV', 'ANYDFICV', 'VDYNFTIV', 'RGYVYQGL', 'RTYTYEKL', 'RFYRTCKL', 'YAMIYRNL', 'IIYRFLLI', 'SMGIYQIL', 'APGNYPAL', 'KSPWFTTL', 'GVYINTAL', 'ICPMYARV', 'GGPIYRRV', 'GLEEYSAM', 'VYIEVLHL', 'SFIRGTKV', 'VGPRYTNL', 'GAYEFTTL', 'IMIKFNRL', 'QAPGFTYT']
dbpeps = ['FQPQNGQFI', 'GRPKNGCIV', 'VNIRNCCYI', 'FGISNYCQI', 'ASNENMDAM', 'ASNENMETM', 'KVPRNQDWL', 'EGSRNQDWL', 'TSPRNSTVL', 'GILGFVFTL', 'RPAPGSTAP', 'APGSTAPPA', 'FAPGNYPAL', 'RMFPNAPYL', 'QGINNLDNL', 'ILNHNFCNL', 'TNLLNDRVL', 'AMGVNLTSM', 'CCLCLTVFL', 'CSLWNGPHL', 'CKGVNKEYL', 'SAINNYAQK', 'SQVTNPANI', 'IQVGNTRTI', 'SSVVGVWYL', 'KAVYNFATC', 'RAHYNIVTF', 'FTFPNEFPF', 'FKHINHQVV', 'MHYTNWTHI', 'WMHHNMDLI', 'HAGSLLVFM', 'WSKDNLPNG', 'GQAPGFTYT']

def mknet(opn=1, hn=210, ipn=160):
    from Bio.NeuralNetwork.BackPropagation import Layer
    from Bio.NeuralNetwork.BackPropagation.Network import BasicNetwork
    output = Layer.OutputLayer(opn)
    hidden = Layer.HiddenLayer(hn, output)
    input = Layer.InputLayer(ipn, hidden)
    network = BasicNetwork(input, hidden, output)
    return network

def mkinput():
    import sys
    from Bio.NeuralNetwork.Training import TrainingExample
    from Bio.NeuralNetwork.Training import ExampleManager
    ipdata = []
    examples = []
    ipdata += rankpeps(kbpeps, 1)
    #print ipdata
    #print len(ipdata)
    ipdata += mkbaddata(8, 100)
    random.shuffle(ipdata)
    #print ipdata
    #print len(ipdata)
    for ip in ipdata:
    #    print len(ip[0])
        examples.append(TrainingExample(ip[0], ip[1]))
    manager = ExampleManager(training_percent = 0.4, validation_percent = 0.4)
    manager.add_examples(examples)
    return manager

def rankpeps(peps, value=1):
    from copy import copy
    peps.sort()
    outdata = []
    for pep in peps:
        tempdata = []
        for r in pep:
            pcode = copy(datamask)
            pcode[aas.index(r)] = 1
            tempdata += pcode
            #tempdata.append(pcode)
        #print 'Outdata: %s' %(outdata)
        outdata.append([tempdata, [value]])
    return outdata

def mkstop(max_iter=200, min_iter=50, verbose=1):
    from Bio.NeuralNetwork import StopTraining
    stopper = StopTraining.ValidationIncreaseStop(max_iter, min_iter, verbose)
    return stopper

def mkbaddata(plen=8, piter=100):
    from copy import copy
    outdata = []
    for i in range(piter):
        tempdata = []
        for ii in range(plen):
            pcode = copy(datamask)
            pcode[random.randrange(0,20)] = 1
            tempdata += pcode
            #tempdata.append(pcode)
        outdata.append([tempdata, [0]])
    return outdata
        
def demo():
    network = mknet()
    manager = mkinput()
    stopper = mkstop()
    # (training examples, validation examples, stop function, learning rate, momentum)
    network.train(manager.train_examples, manager.validation_examples, stopper.stopping_criteria, 0.6, 0.5)
    for test_example in manager.test_examples:
        prediction = network.predict(test_example.inputs)
        print "expected %s, got %s" %(test_example.outputs, prediction)

if __name__ == "__main__":
    demo()

--------------000600070107050908070401--