[Biopython] Best way to change the chain identifiers of a set of residues

Thu Nov 26 08:54:47 UTC 2015

You can pass a quick class to the IO method that takes residue objects:

def trimByContinuityLimit(pdb_file,min_size):
    parser=PDBParser()
    structure=parser.get_structure(pdb_file[:-4],pdb_file)
    residues=Selection.unfold_entities(structure,'R')
    list_id="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890"
    dictio_chainid={}
    residues_to_remove=[]
    current_listres=[]
    index=0
    for i in range(len(residues)-1):
        res1=residues[i]
        res2=residues[i+1]
        id1=res1.id
        id2=res2.id
        check=Bioinformatics.checkContinuity(res1,res2)
        #print 'check',check
        #print 'list_id[index]',list_id[index]
        if check==True:
            #print "These two residues are consecutive",res1,res2
            if id1 not in current_listres:
                current_listres.append(id1)
            dictio_chainid[id1]=list_id[index]
            if id2 not in current_listres:
                current_listres.append(id2)
            dictio_chainid[id2]=list_id[index]
            #print 'list_id[index]',list_id[index]
            #print 'id1,dictio_chainid[id1]',dictio_chainid[id1],id1
            #print 'id2,dictio_chainid[id2]',dictio_chainid[id2],id2
        elif check==False:
            #print "These two residues are not consecutive",res1,res2
            if id1 not in current_listres:
               current_listres.append(id1)
               dictio_chainid[id1]=list_id[index]
            if len(current_listres)<min_size:
               residues_to_remove.extend(current_listres)
            if i==len(residues)-2 and min_size>1: # If we reach this point, then the last residue is not continuous so it is single :
               residues_to_remove.append(id2)
            else:
               current_listres=[]
               current_listres.append(id2)
               index=index+1
               dictio_chainid[id2]=list_id[index]
	class MyAccept(Select):
		def accept_residue(self, residue):
			if residue.id not in residues_to_remove:
                return 1
            else:
                return 0

    io=PDBIO()
    io.set_structure(structure)
    io.save(pdb_file[:-4]+'_trimmed.pdb',MyAccept())

> On Nov 25, 2015, at 12:16 PM, Claudia Millán Nebot <cmncri at ibmb.csic.es> wrote:
> 
> Dear all,
> 
> I am writing a function that examines a structure, and if there are discontinuous regions that are smaller than a certain size, they will be removed from the structure. Then, I would like to write the structure as a pdb in which the chain identifiers are different for each discontinuous fragment. For that purpose, I want to change the chain id of certain residues. ¿What will be the best way to do it? Because right now it is not working, of course, because I am iterating over something that I am trying to change at the same time. Maybe I am missing something very obvious or straightforward, but I do not see what will be the best way to do it... ¿Maybe creating and empty chain and using the set_parent method?
> 
> The current code looks like this:
> def trimByContinuityLimit(pdb_file,min_size):
>     parser=PDBParser()
>     structure=parser.get_structure(pdb_file[:-4],pdb_file)
>     residues=Selection.unfold_entities(structure,'R')
>     list_id="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890"
>     dictio_chainid={}
>     residues_to_remove=[]
>     current_listres=[]
>     index=0
>     for i in range(len(residues)-1):
>         res1=residues[i]
>         res2=residues[i+1]
>         id1=res1.id <http://res1.id/>
>         id2=res2.id <http://res2.id/>
>         check=Bioinformatics.checkContinuity(res1,res2)
>         #print 'check',check
>         #print 'list_id[index]',list_id[index]
>         if check==True:
>             #print "These two residues are consecutive",res1,res2
>             if id1 not in current_listres:
>                 current_listres.append(id1)
>             dictio_chainid[id1]=list_id[index]
>             if id2 not in current_listres:
>                 current_listres.append(id2)
>             dictio_chainid[id2]=list_id[index]
>             #print 'list_id[index]',list_id[index]
>             #print 'id1,dictio_chainid[id1]',dictio_chainid[id1],id1
>             #print 'id2,dictio_chainid[id2]',dictio_chainid[id2],id2
>         elif check==False:
>             #print "These two residues are not consecutive",res1,res2
>             if id1 not in current_listres:
>                current_listres.append(id1)
>                dictio_chainid[id1]=list_id[index]
>             if len(current_listres)<min_size:
>                residues_to_remove.extend(current_listres)
>             if i==len(residues)-2 and min_size>1: # If we reach this point, then the last residue is not continuous so it is single :
>                residues_to_remove.append(id2)
>             else:
>                current_listres=[]
>                current_listres.append(id2)
>                index=index+1
>                dictio_chainid[id2]=list_id[index]
> 
>     io=PDBIO()
>     io.set_structure(structure)
>     io.save(pdb_file[:-4]+'_trimmed.pdb',write_end=False)
> 
> Thanks in advance :)
> 
> Claudia
> _______________________________________________
> Biopython mailing list  -  Biopython at mailman.open-bio.org
> http://mailman.open-bio.org/mailman/listinfo/biopython

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.open-bio.org/pipermail/biopython/attachments/20151126/4b5831e8/attachment-0001.html>