[Bioperl-l] Module: Bio::Structure::IO

Saskia Priesel priesel at caesar.de
Wed Jun 15 02:53:30 EDT 2005


Hello to all,

I have a problem with very much Files from the PDB (Protein Data Bank).
I want to analyse 30000 PDB Files. For this I take the Bioperl Module
Bio::Structure::IO for reading the whole entry. Below I give you the
source code. My Problem is now that I will have to much open entries in
the memory. Is there a method or so in the module which can handle this?

sub filter_data {
    my $pdb_files_ref = shift;

    my @pdb_files = @$pdb_files_ref;
    #print join ("\n", at pdb_files);

    #initialize variables
    my @file_data = ();
    my $min_length = 0;
    my $max_length = 100;

    for(my $i=0;$i<=$#pdb_files;$i++) {
      my $data = $pdb_files[$i];
      #print "$data\n";

      my $structio = Bio::Structure::IO->new(-file => "$data", '-format'
=> 'pdb');

      my $structure = $structio->next_structure();

      print "Structure",$structure->id,"\n";

      my @chain_list = $structure->get_chains();

      my $length = scalar @chain_list;
      #print "Laenge: $length\n";
      #print "Letztes Element: $chain_list[-1]\n";

      for(my $i=0;$i<=$#chain_list;$i++) {
        my $chain = $chain_list[$i];
        #print "Chain: $chain\n";

        my $chainid = $chain->id;
        #print "Chain: $chainid\n";

        if($chainid =~ m/default/) {
          $pseq = $structure->seqres();
          my $default_seq = $pseq->seq();
          #print "$default_seq\n";
          if(length($default_seq) >= $min_length && length($default_seq)
<= $max_length) {
            if($default_seq =~ m/.*C.*C.*C/i == 0) {
              print "Structure",$structure->id,"\n";
              print "Chain: $chainid\n";
              print "$default_seq\n";
              print "Laenge: ",length($default_seq),"\n";
              }
            }
          next;
          }

        my $pseq = $structure->seqres($chainid);

        if (!$pseq){
          last;
          }

        my $sequence = $pseq->seq();
        #print "$sequence\n";

        if(length($sequence) >= $min_length && length($sequence) <=
$max_length) {
          if($sequence =~ m/.*C.*C.*C/i == 0) {
            print "Structure",$structure->id,"\n";
            print "Chain: $chainid\n";
            print "$sequence\n";
            print "Laenge: ",length($sequence),"\n";
            next;
            }
          }
       }
     }
  }



More information about the Bioperl-l mailing list