From cjfields at dev.open-bio.org Thu Oct 2 17:04:10 2008
From: cjfields at dev.open-bio.org (Christopher John Fields)
Date: Thu, 2 Oct 2008 17:04:10 -0400
Subject: [Bioperl-guts-l] [14914] bioperl-live/trunk/Bio/AlignIO/xmfa.pm:
clean up XMFA parsing, allow spaces in parsing (ende++, from IRC)
Message-ID: <200810022104.m92L4Ako031630@dev.open-bio.org>
Revision: 14914
Author: cjfields
Date: 2008-10-02 17:04:09 -0400 (Thu, 02 Oct 2008)
Log Message:
-----------
clean up XMFA parsing, allow spaces in parsing (ende++, from IRC)
Modified Paths:
--------------
bioperl-live/trunk/Bio/AlignIO/xmfa.pm
Modified: bioperl-live/trunk/Bio/AlignIO/xmfa.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-09-27 05:12:08 UTC (rev 14913)
+++ bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-10-02 21:04:09 UTC (rev 14914)
@@ -78,94 +78,35 @@
my ($width) = $self->_rearrange([qw(WIDTH)], at _);
$self->width($width || $WIDTH);
- my ($start, $end, $strand, $name, $seqname, $seq, $seqchar, $entry,
- $tempname, $tempdesc, %align, $desc, $maxlen, $extra);
+ my ($name, $tempname, $seqchar);
my $aln = Bio::SimpleAlign->new();
-
+ my $seqs = 0;
# alignments
- while (defined ($entry = $self->_readline) ) {
+ while (defined (my $entry = $self->_readline) ) {
chomp $entry;
if ( index($entry, '=') == 0 ) {
- if ($entry =~ m{score\s*=\s*(\d+)}) {
+ if (defined $name && $seqchar) {
+ my $seq = $self->_process_seq($name, $seqchar);
+ $aln->add_seq($seq);
+ }
+ if ($aln && $entry =~ m{score\s*=\s*(\d+)}) {
$aln->score($1);
}
last;
- }
- if ( $entry =~ s{^>(.*)$}{} ) {
- $tempname = $1;
- chomp($entry);
- $tempdesc = $entry;
+ } elsif ( $entry =~ m{^>.+$}xms) {
if ( defined $name ) {
- # put away last name and sequence
- if ( $name =~ m{\d+:(\d+)-(\d+)\s([+-]{1})(?:\s+(\S+)\s*(.*))?} ) {
- ($start, $end, $seqname, $extra) = ($1, $2, $4, $5);
- $strand = ($3 eq '+') ? 1 :
- ($3 eq '-') ? -1 :
- 0;
- } else {
- $self->throw("Does not comform to XMFA format");
- }
- $seq = Bio::LocatableSeq->new(
- -strand => $strand,
- -seq => $seqchar,
- -display_id => $seqname,
- -description => $extra,
- -start => $start,
- -end => $end,
- );
+ my $seq = $self->_process_seq($name, $seqchar);
$aln->add_seq($seq);
- $self->debug("Reading $seqname\n");
}
- $desc = $tempdesc;
- $name = $tempname;
- $desc = $entry;
- $seqchar = "";
- next;
+ $seqchar = '';
+ $name = $entry;
+ } else {
+ $seqchar .= $entry;
}
- $seqchar .= $entry;
}
- # Next two lines are to silence warnings that
- # otherwise occur at EOF when using <$fh>
- $name = "" if (!defined $name);
- $seqchar="" if (!defined $seqchar);
-
- # Put away last name and sequence
- if ( $name =~ m{\d+:(\d+)-(\d+)\s([+-]{1})\s+(\S+)\s*(.*)} ) {
- ($start, $end, $seqname, $extra) = ($1, $2, $4, $5);
- $strand = ($3 eq '+') ? 1 :
- ($3 eq '-') ? -1 :
- 0;
- }
-
- # If $end <= 0, we have either reached the end of
- # file in <> or we have encountered some other error
- if ( !defined $end || $end <= 0 ) {
- undef $aln;
- return $aln;
- }
-
- # This logic now also reads empty lines at the
- # end of the file. Skip this is seqchar and seqname is null
- unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
- $seq = Bio::LocatableSeq->new(-seq => $seqchar,
- -strand => $strand,
- -display_id => $seqname,
- -description => $extra,
- -start => $start,
- -end => $end,
- );
- $aln->add_seq($seq);
- $self->debug("Reading $seqname\n");
- }
- my $alnlen = $aln->length;
- foreach my $seq ( $aln->each_seq ) {
- if ( $seq->length < $alnlen ) {
- my ($diff) = ($alnlen - $seq->length);
- $seq->seq( $seq->seq() . "-" x $diff);
- }
- }
- return $aln;
+ return $aln if $aln->no_sequences;
+ return;
}
=head2 write_aln
@@ -258,4 +199,28 @@
return $self->{'_width'} || $WIDTH;
}
+####### PRIVATE #######
+
+sub _process_seq {
+ my ($self, $entry, $seq) = @_;
+ my ($start, $end, $strand, $seqname, $desc, $all);
+ # put away last name and sequence
+ if ( $entry =~ m{^>\s*\d+:(\d+)-(\d+)\s([+-]{1})(?:\s+(\S+)\s*(\S\.*)?)?} ) {
+ ($start, $end, $seqname, $desc) = ($1, $2, $4, $5);
+ $strand = ($4 eq '+') ? 1 : -1;
+ } else {
+ $self->throw("Line does not comform to XMFA format:\n$entry");
+ }
+ my $seqobj = Bio::LocatableSeq->new(
+ -strand => $strand,
+ -seq => $seq,
+ -display_id => $seqname,
+ -description => $desc || $all,
+ -start => $start,
+ -end => $end,
+ );
+ $self->debug("Reading $seqname\n");
+ return $seqobj;
+}
+
1;
From bugzilla-daemon at portal.open-bio.org Fri Oct 3 10:51:57 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Fri, 3 Oct 2008 10:51:57 -0400
Subject: [Bioperl-guts-l] [Bug 2610] New: Fastq module next_seq() entry
split problem
Message-ID:
http://bugzilla.open-bio.org/show_bug.cgi?id=2610
Summary: Fastq module next_seq() entry split problem
Product: BioPerl
Version: 1.5 branch
Platform: PC
OS/Version: Linux
Status: NEW
Severity: normal
Priority: P2
Component: Bio::SeqIO
AssignedTo: bioperl-guts-l at bioperl.org
ReportedBy: robert.davey at bbsrc.ac.uk
I parse a fastq format file using the following code.
my $stream = Bio::SeqIO->newFh(-format => 'fastq', -fh => \*CF);
my $qualout = Bio::SeqIO->newFh(-format => 'qual', -fh => \*QF);
my $fastaout = Bio::SeqIO->newFh(-format => 'fasta', -fh => \*OF);
while (<$stream>) {
print $fastaout $_;
print $qualout $_;
}
This works fine for most instances, but only if the quality score line doesn't
start with a '@' symbol. Most quality lines look like the following, starting
with a '!' symbol:
+CBS432-11f16.p1k bases 34 to 1019
!::A>>CIIIIIIIITTTYTNNIIHCCCDDDIIIN
However, one of my entries starts thus:
+CBS432-11f16.q1k bases 1 to 879
@9 at BIGGFINNNNTIFIFIIFFFIIIIIIIFDD
In this instance, because next_seq() splits entries on '\n\@', the parser
thinks that the @ denotes a new entry, and the regex fails, throwing an "Can't
parse fastq entry" error.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Fri Oct 3 11:43:13 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Fri, 3 Oct 2008 11:43:13 -0400
Subject: [Bioperl-guts-l] [Bug 2610] Fastq module next_seq() entry split
problem
In-Reply-To:
Message-ID: <200810031543.m93FhDbX027798@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2610
cjfields at bioperl.org changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution| |INVALID
------- Comment #1 from cjfields at bioperl.org 2008-10-03 11:43 EST -------
See bug 2335. Already fixed in main trunk, recommebding an update from
subversion.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From lstein at dev.open-bio.org Fri Oct 3 13:13:46 2008
From: lstein at dev.open-bio.org (Lincoln Stein)
Date: Fri, 3 Oct 2008 13:13:46 -0400
Subject: [Bioperl-guts-l] [14915] bioperl-live/trunk/Bio/Graphics: apply
HTML escaping to imagemap title attributes rather than URL escaping
Message-ID: <200810031713.m93HDkkN007471@dev.open-bio.org>
Revision: 14915
Author: lstein
Date: 2008-10-03 13:13:44 -0400 (Fri, 03 Oct 2008)
Log Message:
-----------
apply HTML escaping to imagemap title attributes rather than URL escaping
Modified Paths:
--------------
bioperl-live/trunk/Bio/Graphics/FeatureFile.pm
bioperl-live/trunk/Bio/Graphics/Panel.pm
Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-10-02 21:04:09 UTC (rev 14914)
+++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-10-03 17:13:44 UTC (rev 14915)
@@ -1563,7 +1563,7 @@
sub link_pattern {
my $self = shift;
- my ($linkrule,$feature,$panel) = @_;
+ my ($linkrule,$feature,$panel,$dont_escape) = @_;
$panel ||= 'Bio::Graphics::Panel';
@@ -1574,12 +1574,14 @@
}
require CGI unless defined &CGI::escape;
+ my $escape_method = $dont_escape ? sub {shift} : \&CGI::escape;
+
my $n;
$linkrule ||= ''; # prevent uninit warning
my $seq_id = $feature->can('seq_id') ? $feature->seq_id() : $feature->location->seq_id();
$seq_id ||= $feature->seq_id; #fallback
- $linkrule =~ s/\$(\w+)/
- CGI::escape(
+ $linkrule =~ s!\$(\w+)!
+ $escape_method->(
$1 eq 'ref' ? (($n = $seq_id) && "$n") || ''
: $1 eq 'name' ? (($n = $feature->display_name) && "$n") || ''
: $1 eq 'class' ? eval {$feature->class} || ''
@@ -1596,7 +1598,7 @@
: $1 eq 'id' ? $feature->feature_id || ''
: '$'.$1
)
- /exg;
+ !exg;
return $linkrule;
}
@@ -1621,9 +1623,9 @@
for my $label ($self->feature2label($feature)) {
my $linkrule = $self->setting($label,'title');
- $linkrule ||= $self->setting(general=>'title');
+ $linkrule ||= $self->setting(general=>'title');
next unless $linkrule;
- return $self->link_pattern($linkrule,$feature);
+ return $self->link_pattern($linkrule,$feature,undef,1);
}
my $method = eval {$feature->method} || $feature->primary_tag;
Modified: bioperl-live/trunk/Bio/Graphics/Panel.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-02 21:04:09 UTC (rev 14914)
+++ bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-03 17:13:44 UTC (rev 14915)
@@ -1089,6 +1089,8 @@
my $boxes = $self->boxes;
my (%track2link,%track2title,%track2target);
+ eval "require CGI" unless CGI->can('escapeHTML');
+
my $map = qq(