[Bioperl-l] some contigs do not work for sequence retrievel

Hermann Norpois hnorpois at googlemail.com
Tue May 8 12:16:28 EDT 2012


Hello,

for getting a sequence 5 prime upstream of TTS I wrote a script that works
for some geneids but not for all. I always get a contig and coordinates. I
do not have an idea why I do not get a sequence ( I only get fasta
headers). Actually the sequence ID should be out of importance if I see
that a contig is detected. Has anybody an idea?

Thanks
Hermann Norpois


#!/bin/perl -w
use strict;
use Bio::DB::EntrezGene;
use Bio::SeqIO;
use Bio::DB::GenBank;

my $id = "12064"; #Works with geneid 18619 (Penk1) but not with 54161
(copg) or 12064 (bdnf)

my $seqio_obj = Bio::SeqIO->new(-file => ">bdnf.fasta", -format => 'fasta'
);

my $db = new Bio::DB::EntrezGene;

my $seq = $db->get_Seq_by_id($id);

my $ac = $seq->annotation;

for my $ann ($ac->get_Annotations('dblink')) {
    if ($ann->database eq "Evidence Viewer") {
                # get the sequence identifier, the start, and the stop
        my ($contig,$from,$to) = $ann->url =~
          /contig=([^&]+).+from=(\d+)&to=(\d+)/;
                 my $chr_start = $from-700;
                 my $chr_stop = $from;
               #  my $strand = 1;
                print "CONTIG:\t$contig\tFROM\t$from\tTO\t$to\n\tFETCHING
SEQUENCE FROM\t$chr_start\tTO\t$chr_stop\n"; # Control that something was
detected.
                my $gb  = Bio::DB::GenBank->new(-format     => 'fasta',
                                -seq_start  => $chr_start,
                                -seq_stop   => $chr_stop,
                #               -strand     => $strand
                #               -complexity => 1
                                );
              #  $gb->request_format('fasta');
                my $obj = $gb->get_Seq_by_id($contig);

                $seqio_obj->write_seq($obj);

    }
}



More information about the Bioperl-l mailing list