[Bioperl-l] Possible Repeat E-Mail

Warren Gallin wgallin at ualberta.ca
Wed Apr 16 05:37:57 UTC 2014


Jason,

	My previous message bounced, presumably because I included an attachment.

	On the chance that it did not make it through, here is the relevant test case:

A script called Test_Script.pl is as follows:

____________________________________________________________________

#!/usr/bin/perl 

use strict;
use warnings;
use DBI;
use Bio::Seq;
use Bio::DB::EUtilities;
use Bio::SeqIO;
use Bio::Seq;
use Data::Printer;
use Bio::DB::GenBank;

my $gi = 302393575;  #This gi number is for the protein record of a horse ion channel
my $spliced_cds;
my $na_seq;
my %na_vkcnt_id;

#Create a database handle to GENBANK for retrieving coding sequences

my $gb_db = Bio::DB::GenBank->new();


#create a structure for fetching protein records from GENBANK

my $factory = Bio::DB::EUtilities->new(
    -eutil   => 'efetch',
    -db      => 'protein',
    -rettype => 'gb',
    -email   => 'wgallin at ualberta.ca',
    -id      => $gi
);

my $holding_file = 'protein_records.gb';

$factory->get_Response( -file => $holding_file );

my $seqin = Bio::SeqIO->new(
    -file   => $holding_file,
    -format => 'genbank'
);

while ( my $seq = $seqin->next_seq ) {
    my $na_acc_gennt;
    my $hit_gi = $seq->primary_id;

    for my $feature_object ( $seq->get_SeqFeatures ) {

        if ( $feature_object->primary_tag eq "CDS" ) {
            $spliced_cds = $feature_object->spliced_seq($gb_db);
            $na_seq      = $spliced_cds->seq;

        print "UPDATE gennt SET cds = \"$na_seq\" ;\n";
		}
    }
}
exit;

________________________________________________________

When I run it I get the following output:


_________________________________________________________

warrenglinsmbp2:140414_Update_Flawed_gennt_Entries wgallin$ perl Test_Script.pl

--------------------- WARNING ---------------------
MSG: API has changed; please use '-db' or '-nosort' for args. See POD for more details.
---------------------------------------------------
UPDATE gennt SET cds = "MPVRRGHVAPQNTFLDTIIRKFEGQSRKFIIANARVENCAVIYCNDGFCELCGYSRAEVMQRPCTCDFLHGPRTQRRAAAQIAQALLGAEERKVEISFYRKDGSCFLCLVDVVPVKNEDGAVIMFILNFEVVMEKDMVGSPARDTNHRGPPTSWLATGRAKTFRLKLPALLALTARESTVRPGGAGSTGAPGAVVVDVDLTPAAPSSESLALDEVTAMDNHVAGLGPAEERRALVGPGSPPACAPIPHPSPRAHSLNPDASGSSCSLARTRSRESCASVRRASSADDIEAMRTGLPPPPRHASTGAMHPLRSGLLNSTSDSDLVRYRTISKIPQITLNFVDLKGDPFLASPTSDREIIAPKIKERTHNVTEKVTQVLSLGADVLPEYKLQAPRIHRWTILHYSPFKAVWDWLILLLVIYTAVFTPYSAAFLLKETEEGPPATDCGYACQPLAVVDLIVDIMFIVDILINFRTTYVNANEEVVSHPGRIAVHYFKGWFLIDMVAAIPFDLLIFGSGSEELIGLLKTARLLRLVRVARKLDRYSEYGAAVLFLLMCTFALIAHWLACIWYAIGNMEQPHMDSRIGWLHNLGDQIGKPYNSSGLGGPSIKDKYVTALYFTFSSLTSVGFGNVSPNTNSEKIFSICVMLIGSLMYASIFGNVSAIIQRLYSGTARYHTQMLRVREFIRFHQIPNPLRQRLEEYFQHAWSYTNGIDMNAVLKGFPECLQADICLHLNRSLLQHCKPFRGATKGCLRALAMKFKTTHAPPGDTLVHAGDLLTALYFISRGSIEILRGDVVVAILGKNDIFGEPLNLYARPGKSNGDVRALTYCDLHKIHRDDLLEVLDMYPEFSDHFWSSLEITFNLRDTNMIPGSPGSTELEGGFNRQRKRKLSFRRRTDKDPEQPGEVSALGPGRAGAGPSSRGRPGGPWGESPSSGPSSPESSEDEGPGRSSSPLRLVPFSSPRPPGEPPGGEPLIEDCEKSSDTCNPLSGAFSGVSNIFSFWGDSRGRQYQELPRCPAPAPSLLNIPLSSPGRRPRGDVESRLDALQRQLNRLETRLSADMATVLQLLQRQMTLVPPAYSAVTTPGPGPTSTSPLLPVSPIPTLTLDSLSQVSQFMACEELPPGAPELPQDGPTRRLSLPGQLGALTSQPLHRHGSDPGS" ;
warrenglinsmbp2:140414_Update_Flawed_gennt_Entries wgallin$ 


___________________________________________________________

The problem is that the sequence is being returned as translated CDS rather than the nucleotide sequence of the CDS.

This happens with every gi number that I have tried.

I fell like I am missing some subtle BioPerl of wisdom, but I can not figure out what that is.

Thanks for looking at this.

Warren Gallin







More information about the Bioperl-l mailing list