[Bioperl-l] from protein gi number to CDS seqs.

Sun, Jian jsun at utdallas.edu
Tue Sep 27 13:09:28 EDT 2005


Dear all, I got some reference code from this board as attached below to get the CDS sequence from protein gi number. But for most of my protein gi #, I got the erroe message. Could you please help me to point out what's the problem is?
 
Thanks in advance.
Jian Sun
 
The codes I got from the bBioperl Board are :
 
#!C:\Perl\bin\perl.exe
use lib "C:\Perl\lib";
use strict;
use Bio::DB::GenBank;
use Bio::DB::GenPept;
use Bio::DB::FileCache;
use Bio::Factory::FTLocationFactory;
use Bio::SeqFeature::Generic;
 
my $ntdb = new Bio::DB::GenBank;
my $pepdb= new Bio::DB::GenPept;
 
# do some caching in the event you're pulling up the same
# chromosome and/or you are debugging
my $cachent = new Bio::DB::FileCache(-kept => 1,
                                    -file => './tmp/nt.idx',
                                    -seqdb => $ntdb);
 
my $cachepep = new Bio::DB::FileCache(-kept => 1,
                                     -file => './tmp/pep.idx',
                                     -seqdb => $pepdb);
 
# obj to turn strings into Bio::Location object
my $locfactory = new Bio::Factory::FTLocationFactory;
 
# you might get these from a file (and they can be accessions too)
# protein gi number (10956263)
my @protgis = (23172508);  
 
foreach my $gi ( @protgis ) {    #id
  my $protseq = $cachepep->get_Seq_by_id($gi);
  if( ! $protseq ) { print STDERR "could not find a seq for gi:$gi\n";
                    next;
                   }
  foreach my $cds (  grep { $_->primary_tag eq 'CDS' }
                          $protseq->get_SeqFeatures() )
  {
     next unless( $cds->has_tag('coded_by') ); # skip CDSes with no coded_by
     my ($codedby) = $cds->each_tag_value('coded_by');
     my ($ntacc,$loc) = split(/\:/, $codedby);
     $ntacc =~ s/(\.\d+)//; # genbank wants an accession not a versioned one
     my $cdslocation = $locfactory->from_string($loc);
     my $cdsfeature = new Bio::SeqFeature::Generic(-location => $cdslocation);
     my $ntseq = $cachent->get_Seq_by_acc($ntacc);
     next unless $ntseq;
     $ntseq->add_SeqFeature($cdsfeature); # locate the feature on a seq
     my $cdsseq = $cdsfeature->spliced_seq();
     print "cds seq is : \n", $cdsseq->seq(), "\n";
     my $startP = (109-1) * 3 + 1;
     my $endP = (116-1) * 3 + 1 ;
     my $mypiece = $cdsseq->subseq($startP,$endP);
     print "\n my seq. piece is: ", $mypiece;
 }
}
******************************************************************
And the error message I got is:
 

----------EXCEPTION-------------------------------

MSG: acc does not exist

STACK BIO::DB::WebDBSeqI::get_Seq_by acc  C:/perl/site/Lib/Bio/DB/WebDBSeqI.pm :176.

STACK BIO::DB::WebDBSeqI::get_Seq_by acc  C:/perl/site/Lib/Bio/DB/WebDBSeqI.pm :170.

 

 --------------------------------------

 

 



More information about the Bioperl-l mailing list