From cjfields at dev.open-bio.org Wed Jul 1 00:26:30 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 1 Jul 2009 00:26:30 -0400 Subject: [Bioperl-guts-l] [15817] bioperl-live/trunk/t/data/fastq/test1_sanger.fastq: cut down on fastq example Message-ID: <200907010426.n614QUTs018682@dev.open-bio.org> Revision: 15817 Author: cjfields Date: 2009-07-01 00:26:30 -0400 (Wed, 01 Jul 2009) Log Message: ----------- cut down on fastq example Modified Paths: -------------- bioperl-live/trunk/t/data/fastq/test1_sanger.fastq Modified: bioperl-live/trunk/t/data/fastq/test1_sanger.fastq =================================================================== --- bioperl-live/trunk/t/data/fastq/test1_sanger.fastq 2009-07-01 03:17:05 UTC (rev 15816) +++ bioperl-live/trunk/t/data/fastq/test1_sanger.fastq 2009-07-01 04:26:30 UTC (rev 15817) @@ -998,1927 +998,3 @@ TATTGACAATTGTAAGACCACTAAGGATTTTTGGGCGGCAGCGACTTGGAGCTCTTGTAAAAGCGCACTGCGTTCCTTTTCTTTATTCTTTTGATCTTGAGAATCTTCTAAAAATGCCGAAAAGAAATGTTGGGAAGAGAGCGTAATCAGTTTAGAAATGCTCTTGATGGTAGCTTTATGTTGATCCATTCTTCTGCCTCCTTTACGAATAAAATAGAATAAAACTCAAATGACTAATTACCTGTATTTTACCTAATTTTGTGATAAAATTCAAGAAAATATGTTCGCCTTCAATAATTATG +SRR005406.250 FB9GE3J10F6I2T length=302 FFFFFFFFFFFIGIIFFFHHIHHHHHFBBBBBHHC>==GHHHHHHFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGG>>>CGFFBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGFFFFFFFFFFFFFFFFFFFFFFFFCCCGGFFFFFFFFFFIIIIGGGGIIIGGGIIIIIIIIIIGGGGGA::::??@AA@@@@@@@@@4444477@@@@@@@@AA at A@@@@@@:::==?????@@A - at SRR005406.251 FB9GE3J10GAX0T length=78 -TATTGACTCGTTGTTTTTGTAAAGACTAGGAACTCCTGTTGATTCATCTTTAGATTTCCAATTTGTTTTTCTTAAAAA -+SRR005406.251 FB9GE3J10GAX0T length=78 -AAAAA@???=>=2.....6)008608<=>>=73438=?=::<=>>=<<22229:000::990002,,,,,2((..... - at SRR005406.252 FB9GE3J10F6YQK length=134 -TATTGACCACATTGTATTACGAAATTCTTCCATGCCTAGTATTATCTAGTCACATGAAAACGCTATTCATTTGTTAACAGTGCTTATTCTAACGAGTATCATTATTTTTGTCAACCTTTTTCAATTATTTTTTT -+SRR005406.252 FB9GE3J10F6YQK length=134 -AAAAAAAAAAAAAAAA>>?BBA??B??5555BBEBBEFFFIAAAHAAAAAAAAAAA????AAAAA???@===41166AAAAAAAAAAAAAAAAAAAA?????><222222)==>>>8==84744443....... - at SRR005406.253 FB9GE3J10GD297 length=342 -TATTGACTTAATATTATGAAATAAAACAAGATAAAAAGTTAGTTGGTAGTCAGATAAAAAGTTAGTTGGTAGTCAGATAAAAAGTTAGTTGGTAGTCAGATAAAAAGTTGTTGGTAGTCAGATAAAAAGTTGTTGGTACCATCTGCTCTTCCTTAGAGCCACGTGGCACACAGCCCGCCTAAAAAGGTTTTAAAAATATATATAAAATATATATAAAAGAAACGCGTGCGCGCGCGGAAATTTCCCGCCCAAAATTAACCTCTTTTTTCTACAAACAGGTCCGCGATATTTACAAAACTGGCACTAAAAAATCAAAAAAGGAGTGAGCGAAAATCCCGCTCA -+SRR005406.253 FB9GE3J10GD297 length=342 -DDDDDDDDDDBBBDDDB at 666@==99B99 at BD55885DDDDDDD@@@BBBBDDDDDDDDDDBB:BBBDDBBBDDDDB8555//A@@2BBD@@@BBDDDDDD88888B@@@DDDDDDDDDDDDDDDDDDDBBBDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDBB00---;DDAABB5888DDDDDDD>><>>>>>>??>>??>>777777=2==>>>>>?????@>>??>>>>>?777777=====8......76........39:::::733337:====777 - at SRR005406.254 FB9GE3J10GESJF length=295 -TATTGACACGTTCACCACGCTTCAACTCAAAACCCTGTTTTTTCATATGCTCGGGGAATTTATCTTGTAGCCATAACAGTTCTTGACGATTAAACACATTTTTTCCTTGCAGTTTTCCATCACGCATAGGCACAACACCTAAATGCATGTGAGGGGTTTGCTCATCATTATGAACTGTTGCATAAGCAATATTTTGCTTGCCATATCGTTCGGAAAATAATTTATAACTTTCCTCAAAAATACGTTTTTGTTTCTCCTGGATCCAGTTGCTCAAAAAATACTCGGTCAGATGTTA -+SRR005406.254 FB9GE3J10GESJF length=295 -FFFFFFFFFFFFFFFFFFIIIIIIIIIIIIIIEEEHH at GGGG@IIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGG@@@FFFFGGGGGICC::CGFF?>>>GGGFFFFFFFFFFFFFFFFFGGGFFFFFFFFFFFFFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFIIIIGGBBEEEA111====0000023220....2...87<>>>>8342249>>???????BBBBBBBBBBBBBBBB??===@AAAAAA@@?AA@??88888AABB???AB@????@??43338>?>=;;;<<:83338?????@@??AA<<<>AAACAAA9999923399AAAAACAAACCC<<7744///////--9999999<22/////565----,,,,--656420----2477702-----44----- - at SRR005406.260 FB9GE3J10GCAHO length=73 -TATTGACTCGTTTGACTAGACAAATCTTTTTTTGTTTAGGAATAAATAAAAAGAAAACAAAGAAAATAGAGGG -+SRR005406.260 FB9GE3J10GCAHO length=73 -CCCCCCCCCCCCCCCCCCFAA000<>>>BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEE>>>>>EEFFFFFFFFFFF===EEEFFFFFFFFFFFFFFFFFF555555BBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF77000000>AFFFFFFFFFFFFFFFFFFFFFFFGGGGGGIIGGGGGGFFFFGFGEEFGG@@@@@@@@@=555:@@@@@@@@@@@@@@@@@:::=?@@==3331:7244@<701111== - at SRR005406.262 FB9GE3J10GBUHJ length=197 -TATTGACATCAAGTTGGCTACAACGAAATTCCTACGACGTATGAAGCGTTACCTGTAGAAGTACAAGCAGCTTTTAAAGCGCGTCCCTTTGGCGAAGTGTATCAGTACTTTTTTGCCTTGACTGCTGAACCAGTAGAACAGCCAGAGCGAGTGACACCAGTAAATTCATACAATGAAAAAAACGTTACNTATTTTAA -+SRR005406.262 FB9GE3J10GBUHJ length=197 -FFFFFFFFFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFE==;;;88ECFFFF88;===EEFFFFFFFFFFFFFFFFGGGGGDEEEFEEEFFFFFFFFFEEEFFFFFFFFFFFFFFFFFFFFFFDDDFFFFAA===AFFFFFFF7777777FF66-=!<<111155 - at SRR005406.263 FB9GE3J10F5K58 length=283 -TATTGACAAGGAAGAACGATGGAAGATCCTGATTTTCCTCGAATTGCCATTACTTATTCATTAGAAGAAAACGCTGAGAATAGTAGCGCACAGCAAGACGAGTTAGCAAAAATTATGGCCGAATATAATGCGTACTATGGTACTTCATGGACCTTACAAGACATTGAGCGCTATAACGGAGACATCAACAATCGCTTAGCTCGTAAAAAAGCAGAGTTTAAAGAATTTGGCCGTCATGTTGATTTGGTTATTGTGGTTGATCGGCTGTTAACAGGTTTTGATG -+SRR005406.263 FB9GE3J10F5K58 length=283 -FFFFFFFFFFFFFFFFFFIIIHHHGGHHHFHHHHIIHHIIHHHHHFFFFFFFFGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGFFFFFFAAAA=:GFFFF>555555?BEGEEEEGGIIIIIGGGGGGGG::;;:@@A at 7777@AAA@@@@?3333?@::1111?@:7 - at SRR005406.264 FB9GE3J10GDD5W length=279 -TATTGACCAGTTGTGCTTAATGGAGGCAGGTGTTCTTGCGATTGAATCCAATAATCATTTAATCCTTGCAAGTCTGGATTTGTAATTAATACCTCTTTCTTTTCCACATAATCTGAAAGAATCGGAATTTGCTTCTGTCTTGCTAGTTGATAAAGCTGATGATAGGTTTGTCCCGCTTCATAAAATAACGACCGCGGATCAGGTAAGTCATGCGGATATTGTTTGTCCAATACGAGGGCTACTTTCACAGAACCTGCTTGCGCCATTAGCACTTCAATA -+SRR005406.264 FB9GE3J10GDD5W length=279 -FFFFFFFFFFFFFFFFFFIIIIIIIIIHHHHGIIIIIIIIIIIIIFFFFFFGGGFFFFIGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF????FFFFFGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGIIIIIIIIGGGIIIIIIIIIIIIIIIIIAAA@@????AAAAAAA@@@@@@@A@?===@?=33 - at SRR005406.265 FB9GE3J10GFYZ8 length=177 -TATTGACGAAGCAAAGAGACACCGATTCAAAATTTAGAAATTAAAGTAAAGCATCCTAATTATCTTTCATTACGAGCTACAAAAGAAATTTATTTTTATTACAAGTTAGGAACGGATTATACAGTAACGCCAACGTCAGATGGCTCAGTTATTAAGTTCACTACGCCAATAACCAAT -+SRR005406.265 FB9GE3J10GFYZ8 length=177 -FFFFFFFFFFFFFFFFFFIIIIIIIIIIIIIIH??=HGIIHH999GB===GGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBBBFFFFFFFFFFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF - at SRR005406.266 FB9GE3J10F8NSK length=302 -TATTGACCAGATTACATTGAACCAGGACGTGATTTTCCAGGTGTGAAAGAAGCCCGTGAAATTGCCCTTGTTGACTTAGATGCAGCGGTTGCTTATGAAACGAAACATACCTTGTTACATTTGATTGAACAAGAACATAAAATTTATCCAAAAACGTTAGAGACCTACAATCAATGGGTCGTTAATCAAAAATAAAAGAAGAAAGAATTACTAGGAGGGAACTATCATAGATAGCCAGAAAATTTTAGAAATTGCTGTCAAAGCAGCAGATTCAAAACGAGCAGAAGAAATTGTCGCATTAA -+SRR005406.266 FB9GE3J10F8NSK length=302 -EEEEEEEEEEEEEEEEEEGGGGGGGGGGGGGGGGGGGGGGGIIIIEEEEEEEEEEEEEEEEEEEAAAEEDDDEEEEEEEEEEEEEEAAAAEEEEEEEEEEEEEEEEEEE====EEEEEE===EEEEEEEEEEEEEEEEEEEEEEEEF??/////5777/AAEEEEEEEEEEEEEEEDD@@ADDEEDD;;===EEEEEEEEEEEEEEEEEEEEEDDDAAADDDDDDDDDDDDDDD@????A::111.-/.....;?A?@@:::?@@@@@@@77731119;6;;//.1111.;;;9;666.... - at SRR005406.267 FB9GE3J10GFXUI length=310 -TATTGACAATACTACTACATTTAATGCTTTGTTCTTATAAAAGACAGCTATGGAATGGGTCCATGTATATATATTATGTACTCTTAAATATTAACACCGAAATTTGTATCAACCACCAATATCTATTTCAGTTCATCGACTAATCTCTTGGAACGCTATCAGACAATACTCGCCGATACTAACTCAAAAAATCGAGGAAATTTTATTCGGTTTAACACAAGCTAATTTATTTCTTCCCATTTATTTTTAGTCTGCTTATATAATATGGTTAGTTCTTTTTTATCCTTATCTAAATTTTTATTTATTAATT -+SRR005406.267 FB9GE3J10GFXUI length=310 @@ Diff output truncated at 10000 characters. @@ From bugzilla-daemon at portal.open-bio.org Wed Jul 1 06:14:43 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 1 Jul 2009 06:14:43 -0400 Subject: [Bioperl-guts-l] [Bug 2868] New: Can't parse Arabidopsis TAIR9 release in TIGR XML format Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2868 Summary: Can't parse Arabidopsis TAIR9 release in TIGR XML format Product: BioPerl Version: unspecified Platform: Macintosh OS/Version: Mac OS Status: NEW Severity: normal Priority: P2 Component: Bio::SeqIO AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: jay.moore at warwick.ac.uk I can't parse the latest Arabidopsis TIGR XML format genome assembly. format 'tigr' and 'tigrxml' both give parsing errors. Relevant source files are available here: ftp://ftp.arabidopsis.org/home/tair/Genes/TAIR9_genome_release/Tair9_XML BTW I did unzip the files first after downloading, before I tried to parse them. Comparing one of the TAIR9 XML files with the BioPerl file test.tigrxml shows that the structures of the two files are basically different on many levels. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From jason at bioperl.org Wed Jul 1 12:42:33 2009 From: jason at bioperl.org (Jason Stajich) Date: Wed, 1 Jul 2009 09:42:33 -0700 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method to Bio::Tools::SeqPattern In-Reply-To: <200906260123.n5Q1NBHr010363@portal.open-bio.org> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org> Message-ID: <55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> Soooo - how are we doing this - SVN is still the master where releases are made from - how are github repo fixes getting integrated into SVN? On Jun 25, 2009, at 6:23 PM, bugzilla-daemon at portal.open-bio.org wrote: > http://bugzilla.open-bio.org/show_bug.cgi?id=2844 > > > vecchi.b at gmail.com changed: > > What |Removed |Added > ---------------------------------------------------------------------------- > Status|NEW |RESOLVED > Resolution| |FIXED > > > > > ------- Comment #7 from vecchi.b at gmail.com 2009-06-25 21:23 EST > ------- > Patch applied in the bioperl-live git mirror > (http://github.com/rbuels/bioperl-live/commit/a70f101e720ef6cb23b214841cb02ead8a0ec565 > ). > > > -- > Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi? > tab=email > ------- You are receiving this mail because: ------- > You are the assignee for the bug, or are watching the assignee. > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l -- Jason Stajich jason at bioperl.org From cjfields at dev.open-bio.org Wed Jul 1 12:56:30 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 1 Jul 2009 12:56:30 -0400 Subject: [Bioperl-guts-l] [15818] bioperl-live/trunk/t: roundtrip tests Message-ID: <200907011656.n61GuUlW022784@dev.open-bio.org> Revision: 15818 Author: cjfields Date: 2009-07-01 12:56:29 -0400 (Wed, 01 Jul 2009) Log Message: ----------- roundtrip tests Modified Paths: -------------- bioperl-live/trunk/t/SeqIO/fastq.t bioperl-live/trunk/t/data/fastq/test2_solexa.fastq Modified: bioperl-live/trunk/t/SeqIO/fastq.t =================================================================== --- bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-01 04:26:30 UTC (rev 15817) +++ bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-01 16:56:29 UTC (rev 15818) @@ -7,9 +7,9 @@ use lib '.'; use Bio::Root::Test; - test_begin(-tests => 29); - - use_ok('Bio::SeqIO::fastq'); + test_begin(-tests => 47); + + use_ok('Bio::SeqIO::fastq'); } my $DEBUG = test_debug(); @@ -17,8 +17,8 @@ # original FASTQ (Sanger); data is from NCBI SRA database, which has # all data converted over to Sanger version of FASTQ -my $in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test1_sanger.fastq'), - -format => 'fastq'); +my $in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test1_sanger.fastq'), + -format => 'fastq'); isa_ok($in_qual, 'Bio::SeqIO'); my $qual = $in_qual->next_seq(); @@ -38,8 +38,7 @@ # this is the test example from the MAQ script , better examples welcome! $in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test2_solexa.fastq'), - -variant => 'solexa', - -format => 'fastq'); + -format => 'fastq-solexa'); $qual = $in_qual->next_seq(); isa_ok($qual, 'Bio::Seq::Quality'); @@ -48,7 +47,7 @@ is(@quals, 25, 'number of qual values'); $qualslice = join(',', at quals[12..24]); -is($qualslice, '30,30,30,30,30,30,28,30,28,30,30,24,26', 'qual slice'); +is($qualslice, '25,25,25,25,25,25,23,25,23,25,25,19,21', 'qual slice'); is($qual->display_id, 'SLXA-B3_649_FC8437_R1_1_1_610_79'); is($qual->desc, undef); @@ -56,8 +55,7 @@ # Illumina v1.3 $in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test3_illumina.fastq'), - -variant => 'illumina', - -format => 'fastq'); + -format => 'fastq-illumina'); $qual = $in_qual->next_seq(); isa_ok($qual, 'Bio::Seq::Quality'); @@ -72,9 +70,9 @@ is($qual->desc, undef); # bug 2335 + $in_qual = Bio::SeqIO->new('-file' => test_input_file('fastq','bug2335.fastq'), - '-format' => 'fastq', - -variant => 'sanger'); + '-format' => 'fastq-sanger'); $qual = $in_qual->next_seq(); isa_ok($qual, 'Bio::Seq::Quality'); @@ -92,8 +90,8 @@ # raw data $in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test3_illumina.fastq'), - -variant => 'illumina', - -format => 'fastq'); + -variant => 'illumina', + -format => 'fastq'); $qual = $in_qual->next_dataset(); @@ -105,5 +103,45 @@ is($qual->{-descriptor}, 'FC12044_91407_8_200_406_24'); is(join(',',@{$qual->{-qual}}[0..10]), '19,24,24,20,24,24,24,24,24,24,24'); -# need write_seq tests (NYI) +# some round trip tests for write_fastq +my %format = ( + 'fastq-sanger' => ['test1_sanger.fastq', 250], + 'fastq-solexa' => ['test2_solexa.fastq', 5], + 'fastq-illumina' => ['test3_illumina.fastq', 25] + ); + +while (my ($variant, $data) = each %format) { + my $outfile = "$variant.fastq"; + my ($file, $total) = @$data; + $file = test_input_file('fastq', $file); + my $in = Bio::SeqIO->new(-format => $variant, + -file => $file); + my $out = Bio::SeqIO->new(-format => $variant, + -file => ">$outfile"); + my ($input_ct, $round_trip) = (0,0); + my $test_qual; + while (my $seq = $in->next_seq) { + $input_ct++; + if ($input_ct == 5) { + $test_qual = $seq; + } + # this will likely be changed to write_seq, NYI + $out->write_fastq($seq); + } + is($input_ct, $total, $variant." total"); + $out->close; + my $new_in = Bio::SeqIO->new(-format => $variant, + -file => $outfile); + while (my $seq = $new_in->next_seq) { + $round_trip++; + if ($round_trip == 5) { + for my $att (qw(seq display_id desc)) { + is($seq->$att, $test_qual->$att, "Testing $att"); + } + is_deeply($seq->qual, $test_qual->qual, "Testing qual"); + } + } + is($round_trip, $total, $variant." total"); +} + Modified: bioperl-live/trunk/t/data/fastq/test2_solexa.fastq =================================================================== --- bioperl-live/trunk/t/data/fastq/test2_solexa.fastq 2009-07-01 04:26:30 UTC (rev 15817) +++ bioperl-live/trunk/t/data/fastq/test2_solexa.fastq 2009-07-01 16:56:29 UTC (rev 15818) @@ -1,10 +1,10 @@ @SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA -+SLXA-B3_649_FC8437_R1_1_1_610_79 ++ YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA -+SLXA-B3_649_FC8437_R1_1_1_397_389 ++ YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG From cjfields at illinois.edu Wed Jul 1 12:50:16 2009 From: cjfields at illinois.edu (Chris Fields) Date: Wed, 1 Jul 2009 11:50:16 -0500 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method to Bio::Tools::SeqPattern In-Reply-To: <55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org> <55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> Message-ID: rbuels has been notified about this and will merge the fixes in the next day or two. svn will remain the central repo. Not sure about keeping this github repo, though, unless it remains in sync (it should probably be removed). chris On Jul 1, 2009, at 11:42 AM, Jason Stajich wrote: > Soooo - how are we doing this - SVN is still the master where > releases are made from - how are github repo fixes getting > integrated into SVN? > > > On Jun 25, 2009, at 6:23 PM, bugzilla-daemon at portal.open-bio.org > wrote: > >> http://bugzilla.open-bio.org/show_bug.cgi?id=2844 >> >> >> vecchi.b at gmail.com changed: >> >> What |Removed |Added >> ---------------------------------------------------------------------------- >> Status|NEW |RESOLVED >> Resolution| |FIXED >> >> >> >> >> ------- Comment #7 from vecchi.b at gmail.com 2009-06-25 21:23 EST >> ------- >> Patch applied in the bioperl-live git mirror >> (http://github.com/rbuels/bioperl-live/commit/a70f101e720ef6cb23b214841cb02ead8a0ec565 >> ). >> >> >> -- >> Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email >> ------- You are receiving this mail because: ------- >> You are the assignee for the bug, or are watching the assignee. >> _______________________________________________ >> Bioperl-guts-l mailing list >> Bioperl-guts-l at lists.open-bio.org >> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l > > -- > Jason Stajich > jason at bioperl.org > > > > > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l From cjfields at dev.open-bio.org Wed Jul 1 13:08:38 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 1 Jul 2009 13:08:38 -0400 Subject: [Bioperl-guts-l] [15819] bioperl-live/trunk: tests for hashref to obj Message-ID: <200907011708.n61H8cYZ022860@dev.open-bio.org> Revision: 15819 Author: cjfields Date: 2009-07-01 13:08:37 -0400 (Wed, 01 Jul 2009) Log Message: ----------- tests for hashref to obj Modified Paths: -------------- bioperl-live/trunk/Bio/SeqIO/fastq.pm bioperl-live/trunk/t/SeqIO/fastq.t Modified: bioperl-live/trunk/Bio/SeqIO/fastq.pm =================================================================== --- bioperl-live/trunk/Bio/SeqIO/fastq.pm 2009-07-01 16:56:29 UTC (rev 15818) +++ bioperl-live/trunk/Bio/SeqIO/fastq.pm 2009-07-01 17:08:37 UTC (rev 15819) @@ -161,10 +161,11 @@ $data->{-id} = $id; $data->{-desc} = $fulldesc; $ct->{-seq} = 0; - } elsif ($mode eq '-seq' && $line =~ m{^\+([^\n]*)?}xmso) { + } elsif ($mode eq '-seq' && $line =~ m{^\+([^\n]*)}xmso) { + my $desc = $1; $self->throw("No description line parsed") unless $data->{-descriptor}; - if ($1 && $data->{-descriptor} ne $1) { - $self->throw("Quality descriptor [$1] doesn't match seq description ".$data->{-descriptor} ); + if ($desc && $data->{-descriptor} ne $desc) { + $self->throw("Quality descriptor [$desc] doesn't match seq description ".$data->{-descriptor} ); } $mode = '-raw_quality'; $ct->{-raw_quality} = 0; @@ -203,6 +204,7 @@ # The conversion needs to be to PHRED score, but solexa (aka illumina 1.0) # has Solexa qual units, not PHRED qual units. Convert over... # this doesn't account for very low scores yet! + # NOTE: code is kludged from MAQ (maq.sourceforge.net) @qual = map {sprintf("%.0f",(10 * log(1 + 10 ** ($_ / 10.0)) / log(10)))} @qual; } @@ -220,9 +222,11 @@ Returns : 1 for success and 0 for error Args : Bio::Seq::Quality or Bio::Seq object - =cut +# This should be creating fastq output only. Bio::SeqIO::fasta and +# Bio::SeqIO::qual should be used for that output + sub write_seq { my ($self, at seq) = @_; foreach my $seq (@seq) { Modified: bioperl-live/trunk/t/SeqIO/fastq.t =================================================================== --- bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-01 16:56:29 UTC (rev 15818) +++ bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-01 17:08:37 UTC (rev 15819) @@ -7,9 +7,10 @@ use lib '.'; use Bio::Root::Test; - test_begin(-tests => 47); + test_begin(-tests => 52); use_ok('Bio::SeqIO::fastq'); + use_ok('Bio::Seq::Quality'); } my $DEBUG = test_debug(); @@ -103,8 +104,16 @@ is($qual->{-descriptor}, 'FC12044_91407_8_200_406_24'); is(join(',',@{$qual->{-qual}}[0..10]), '19,24,24,20,24,24,24,24,24,24,24'); -# some round trip tests for write_fastq +# can this be used in a constructor? +my $qualobj = Bio::Seq::Quality->new(%$qual); +is($qualobj->seq, 'GTTAGCTCCCACCTTAAGATGTTTA'); +is($qualobj->display_id, 'FC12044_91407_8_200_406_24'); +is($qualobj->desc, undef); +is(join(',',@{$qualobj->qual}[0..10]), '19,24,24,20,24,24,24,24,24,24,24'); + +# round trip tests for write_fastq + my %format = ( 'fastq-sanger' => ['test1_sanger.fastq', 250], 'fastq-solexa' => ['test2_solexa.fastq', 5], From maj at fortinbras.us Wed Jul 1 16:07:40 2009 From: maj at fortinbras.us (Mark A. Jensen) Date: Wed, 1 Jul 2009 16:07:40 -0400 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> Message-ID: <1878A553A5E84629B9F66D15DFB662B7@NewLife> [ I would add my personal 'yikes!' to two unsynched repos.] ----- Original Message ----- From: "Chris Fields" To: "Jason Stajich" Cc: Sent: Wednesday, July 01, 2009 12:50 PM Subject: Re: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern > rbuels has been notified about this and will merge the fixes in the next day > or two. svn will remain the central repo. Not sure about keeping this > github repo, though, unless it remains in sync (it should probably be > removed). > > chris > > On Jul 1, 2009, at 11:42 AM, Jason Stajich wrote: > >> Soooo - how are we doing this - SVN is still the master where releases are >> made from - how are github repo fixes getting integrated into SVN? >> >> >> On Jun 25, 2009, at 6:23 PM, bugzilla-daemon at portal.open-bio.org wrote: >> >>> http://bugzilla.open-bio.org/show_bug.cgi?id=2844 >>> >>> >>> vecchi.b at gmail.com changed: >>> >>> What |Removed |Added >>> ---------------------------------------------------------------------------- >>> Status|NEW |RESOLVED >>> Resolution| |FIXED >>> >>> >>> >>> >>> ------- Comment #7 from vecchi.b at gmail.com 2009-06-25 21:23 EST ------- >>> Patch applied in the bioperl-live git mirror >>> (http://github.com/rbuels/bioperl-live/commit/a70f101e720ef6cb23b214841cb02ead8a0ec565 >>> ). >>> >>> >>> -- >>> Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email >>> ------- You are receiving this mail because: ------- >>> You are the assignee for the bug, or are watching the assignee. >>> _______________________________________________ >>> Bioperl-guts-l mailing list >>> Bioperl-guts-l at lists.open-bio.org >>> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l >> >> -- >> Jason Stajich >> jason at bioperl.org >> >> >> >> >> _______________________________________________ >> Bioperl-guts-l mailing list >> Bioperl-guts-l at lists.open-bio.org >> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l > > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l > > From cjfields at illinois.edu Wed Jul 1 17:01:23 2009 From: cjfields at illinois.edu (Chris Fields) Date: Wed, 1 Jul 2009 16:01:23 -0500 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: <1878A553A5E84629B9F66D15DFB662B7@NewLife> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> Message-ID: cc'ing robert: I like github, but I completely agree; attempting to keep (and maintain!) two repos in sync is not a great option. We ran into the same issues when switching from CVS to SVN a few years ago (e..g do we keep both? how do we sync everything?). It became such a logistical nightmare that we decided to dump CVS completely, and I have to say it went very smoothly. If we changed VC today I would suggest moving to git/github. I think biopython is experimenting with that. However, we just moved to svn and I think everyone is pretty satisfied with it as is. With that said: Robert, what is the intended long-term purpose of the github repo? From a maintenance perspective I can't see it being stable for long unless it is continuously synced with svn, and that makes incorporating code from forks problematic... chris On Jul 1, 2009, at 3:07 PM, Mark A. Jensen wrote: > [ I would add my personal 'yikes!' to two unsynched repos.] > ----- Original Message ----- From: "Chris Fields" > > To: "Jason Stajich" > Cc: > Sent: Wednesday, July 01, 2009 12:50 PM > Subject: Re: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" > method toBio::Tools::SeqPattern > > >> rbuels has been notified about this and will merge the fixes in >> the next day or two. svn will remain the central repo. Not sure >> about keeping this github repo, though, unless it remains in sync >> (it should probably be removed). >> >> chris >> >> On Jul 1, 2009, at 11:42 AM, Jason Stajich wrote: >> >>> Soooo - how are we doing this - SVN is still the master where >>> releases are made from - how are github repo fixes getting >>> integrated into SVN? >>> >>> >>> On Jun 25, 2009, at 6:23 PM, bugzilla-daemon at portal.open-bio.org >>> wrote: >>> >>>> http://bugzilla.open-bio.org/show_bug.cgi?id=2844 >>>> >>>> >>>> vecchi.b at gmail.com changed: >>>> >>>> What |Removed |Added >>>> ---------------------------------------------------------------------------- >>>> Status|NEW |RESOLVED >>>> Resolution| |FIXED >>>> >>>> >>>> >>>> >>>> ------- Comment #7 from vecchi.b at gmail.com 2009-06-25 21:23 EST >>>> ------- >>>> Patch applied in the bioperl-live git mirror >>>> (http://github.com/rbuels/bioperl-live/commit/a70f101e720ef6cb23b214841cb02ead8a0ec565 >>>> ). >>>> >>>> >>>> -- >>>> Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email >>>> ------- You are receiving this mail because: ------- >>>> You are the assignee for the bug, or are watching the assignee. >>>> _______________________________________________ >>>> Bioperl-guts-l mailing list >>>> Bioperl-guts-l at lists.open-bio.org >>>> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l >>> >>> -- >>> Jason Stajich >>> jason at bioperl.org >>> >>> >>> >>> >>> _______________________________________________ >>> Bioperl-guts-l mailing list >>> Bioperl-guts-l at lists.open-bio.org >>> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l >> >> _______________________________________________ >> Bioperl-guts-l mailing list >> Bioperl-guts-l at lists.open-bio.org >> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l >> > From cjfields at dev.open-bio.org Fri Jul 3 14:45:56 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 3 Jul 2009 14:45:56 -0400 Subject: [Bioperl-guts-l] [15820] bioperl-run/trunk/Bio/Tools/Run/Infernal.pm: [bug 2713] Message-ID: <200907031845.n63IjuYP002393@dev.open-bio.org> Revision: 15820 Author: cjfields Date: 2009-07-03 14:45:55 -0400 (Fri, 03 Jul 2009) Log Message: ----------- [bug 2713] * major overhaul of infernal wrapper to support v1.0 * likely contains some bugs, but cmsearch, cmbuild appear to work (others will be worked out with test suite) * tests to be added (tracking down odd bug with Bio::Root::Test) Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm Modified: bioperl-run/trunk/Bio/Tools/Run/Infernal.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-01 17:08:37 UTC (rev 15819) +++ bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-03 18:45:55 UTC (rev 15820) @@ -9,6 +9,8 @@ # March 2007 - first full implementation; needs some file IO tweaking between # runs but works for now # April 2008 - add 0.81 parameters (may be removed in the 1.0 release) +# +# June 2009 - updated for v1.0. No longer supporting pre-1.0 Infernal =head1 NAME @@ -20,28 +22,29 @@ # parameters which are switches are set with any value that evals TRUE, # others are set to a specific value - my @params = (hmmfb => 1, - thresh => 20); - my $factory = Bio::Tools::Run::Infernal->new(@params); # run cmalign|cmbuild|cmsearch|cmscore|cmemit directly as a wrapper method # this resets the program flag if previously set $factory->cmsearch(@seqs); # searches Bio::PrimarySeqI's based on set cov. model - # saves output to outfile()/tempfile + # saves output to outfile_name or STDOUT # only values which are allowed for a program are set, so one can use the same # wrapper for the following... $factory->cmalign(@seqs); # aligns Bio::PrimarySeqI's to a set cov. model - # saves output to outfile()/tempfile + # output to outfile_name $factory->cmscore(@seqs); # scores set cov. model against Bio::PrimarySeqI's, - # saves output to outfile()/tempfile/STDERR. + # output to outfile_name/STDOUT. $factory->cmbuild($aln); # builds covariance model based on alignment - # saves CM to model(), output to outfile()/tempfile/STDERR. + # CM to outfile_name (required here), output to STDOUT. $factory->cmemit($file); # emits sequence from specified cov. model; # set one if no file specified + $factory->cmcalibrate($file); # calibrates specified cov. model; + # set one if no file specified + $factory->cmstat($file); # summary stats for cov. model; + # set one if no file specified # run based on the setting of the program parameter @@ -68,14 +71,32 @@ =head1 DESCRIPTION Wrapper module for Sean Eddy's Infernal suite of programs. The current -implementation runs cmsearch, cmalign, cmemit, cmbuild, and cmscore. The only -current BioPerl object returned is for cmsearch (as shown in the SYNOPSIS); all -others are sent to either the designated outfile, a tempfile, or STDOUT. +implementation runs cmsearch, cmcalibrate, cmalign, cmemit, cmbuild, cmscore, +and cmstat. The only current BioPerl object returned is for cmsearch (as shown +in the SYNOPSIS); all others are sent to either the designated outfile, a +tempfile, or STDOUT. -Since the Infernal suite is under constant development, consider this wrapper as -highly experimental. It will only actively support the latest Infernal release -(now at v. 0.81, used to build Rfam 8.0) until a 1.0 Infernal release is made. +We HIGHLY suggest upgrading to Infernal 1.0. In that spirit, this wrapper now +supports parameters for Infernal 1.0 only; for wrapping older versions of +Infernal we suggest using the version of Bio::Tools::Run::Infernal that came +with previous versions of BioPerl-run. +NOTE: Due to conflicts in the way Infernal parameters are now formatted vs. +subroutine naming in Perl (specifically the inclusion of hyphens) and due to the +very large number of parameters available, setting and resetting parameters via +set_parameters() and reset_parameters() is required. Only parameters that are +valid for the executable set via program()/program_name() are set, the others +are silently ignored at this time. + +Also of note is some minor conflation between the use of the WrapperBase +outfile_name() method, the -o option (which designates the outfile for cmsearch +and cmalign), and the -outfile option (which is the outfile for sequences from +cmscore). All three are allowed; in particular, the -outfile parameter from +cmscore is not the actual output from the program but is for sequence output +only. If both -o and -outfile_name is set, a warning is issued and outfile_name +is set. Note that -o is only available for cmsearch and cmalign, while +outfile_name is allowed for all programs. + =head1 FEEDBACK =head2 Mailing Lists @@ -124,77 +145,222 @@ package Bio::Tools::Run::Infernal; use strict; +use warnings; +use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase Bio::ParameterBaseI); + use Bio::SeqIO; -use Bio::Root::Root; use Bio::SearchIO; use Bio::AlignIO; -use Bio::Tools::Run::WrapperBase; +use Data::Dumper; -use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); +# yes, these are the current parameters +our %INFERNAL_PARAMS = ( + 'A' => ['switch', '-', qw(cmbuild)], + 'E' => ['param', '-', qw(cmsearch cmstat)], + 'F' => ['switch', '-', qw(cmbuild)], + 'Lmax' => ['param', '--', qw(cmscore)], + 'Lmin' => ['param', '--', qw(cmscore)], + 'T' => ['param', '-', qw(cmsearch cmstat)], + 'Wbeta' => ['param', '--', qw(cmbuild)], + 'Z' => ['param', '-', qw(cmsearch cmstat)], + 'a' => ['switch', '-', qw(cmbuild cmemit cmscore)], + 'afile' => ['param', '--', qw(cmstat)], + 'ahmm' => ['param', '--', qw(cmemit)], + 'all' => ['switch', '--', qw(cmstat)], + 'aln-hbanded' => ['switch', '--', qw(cmsearch)], + 'aln-optacc' => ['switch', '--', qw(cmsearch)], + 'aln2bands' => ['switch', '--', qw(cmscore cmsearch)], + 'banddump' => ['param', '--', qw(cmalign)], + 'begin' => ['param', '--', qw(cmemit)], + 'beta' => ['param', '--', qw(cmalign cmscore cmsearch cmstat)], + 'betae' => ['param', '--', qw(cmscore)], + 'betas' => ['param', '--', qw(cmscore)], + 'bfile' => ['param', '--', qw(cmstat)], + 'binary' => ['switch', '--', qw(cmbuild)], + 'bits' => ['switch', '--', qw(cmstat)], + 'bottomonly' => ['switch', '--', qw(cmsearch)], + 'c' => ['switch', '-', qw(cmemit)], + 'call' => ['switch', '--', qw(cmbuild)], + 'cdump' => ['param', '--', qw(cmbuild)], + 'cfile' => ['param', '--', qw(cmbuild)], + 'checkfb' => ['switch', '--', qw(cmalign)], + 'checkpost' => ['switch', '--', qw(cmalign)], + 'cmL' => ['param', '--', qw(cmstat)], + 'cmaxid' => ['param', '--', qw(cmbuild)], + 'cmtbl' => ['param', '--', qw(cmbuild)], + 'corig' => ['switch', '--', qw(cmbuild)], + 'ctarget' => ['param', '--', qw(cmbuild)], + 'cyk' => ['switch', '--', qw(cmalign cmbuild cmsearch)], + 'devhelp' => ['switch', '--', qw(cmalign cmbuild cmcalibrate cmemit cmscore cmsearch)], + 'dlev' => ['param', '--', qw(cmalign)], + 'dna' => ['switch', '--', qw(cmalign cmemit cmsearch)], + 'eX' => ['param', '--', qw(cmbuild)], + 'eent' => ['switch', '--', qw(cmbuild)], + 'efile' => ['param', '--', qw(cmstat)], + 'ehmmre' => ['param', '--', qw(cmbuild)], + 'elself' => ['param', '--', qw(cmbuild)], + 'emap' => ['param', '--', qw(cmbuild)], + 'emit' => ['switch', '--', qw(cmscore)], + 'end' => ['param', '--', qw(cmemit)], + 'enone' => ['switch', '--', qw(cmbuild)], + 'ere' => ['param', '--', qw(cmbuild)], + 'exp' => ['param', '--', qw(cmemit)], + 'exp-T' => ['param', '--', qw(cmcalibrate)], + 'exp-beta' => ['param', '--', qw(cmcalibrate)], + 'exp-cmL-glc' => ['param', '--', qw(cmcalibrate)], + 'exp-cmL-loc' => ['param', '--', qw(cmcalibrate)], + 'exp-ffile' => ['param', '--', qw(cmcalibrate)], + 'exp-fract' => ['param', '--', qw(cmcalibrate)], + 'exp-gc' => ['param', '--', qw(cmcalibrate)], + 'exp-hfile' => ['param', '--', qw(cmcalibrate)], + 'exp-hmmLn-glc' => ['param', '--', qw(cmcalibrate)], + 'exp-hmmLn-loc' => ['param', '--', qw(cmcalibrate)], + 'exp-hmmLx' => ['param', '--', qw(cmcalibrate)], + 'exp-no-qdb' => ['switch', '--', qw(cmcalibrate)], + 'exp-pfile' => ['param', '--', qw(cmcalibrate)], + 'exp-qqfile' => ['param', '--', qw(cmcalibrate)], + 'exp-random' => ['switch', '--', qw(cmcalibrate)], + 'exp-sfile' => ['param', '--', qw(cmcalibrate)], + 'exp-tailn-cglc' => ['param', '--', qw(cmcalibrate)], + 'exp-tailn-cloc' => ['param', '--', qw(cmcalibrate)], + 'exp-tailn-hglc' => ['param', '--', qw(cmcalibrate)], + 'exp-tailn-hloc' => ['param', '--', qw(cmcalibrate)], + 'exp-tailp' => ['param', '--', qw(cmcalibrate)], + 'exp-tailxn' => ['param', '--', qw(cmcalibrate)], + 'fil-E-hmm' => ['param', '--', qw(cmsearch)], + 'fil-E-qdb' => ['param', '--', qw(cmsearch)], + 'fil-F' => ['param', '--', qw(cmcalibrate)], + 'fil-N' => ['param', '--', qw(cmcalibrate)], + 'fil-Smax-hmm' => ['param', '--', qw(cmsearch)], + 'fil-T-hmm' => ['param', '--', qw(cmsearch)], + 'fil-T-qdb' => ['param', '--', qw(cmsearch)], + 'fil-aln2bands' => ['switch', '--', qw(cmcalibrate)], + 'fil-beta' => ['param', '--', qw(cmsearch)], + 'fil-dfile' => ['param', '--', qw(cmcalibrate)], @@ Diff output truncated at 10000 characters. @@ From bugzilla-daemon at portal.open-bio.org Fri Jul 3 14:46:04 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 3 Jul 2009 14:46:04 -0400 Subject: [Bioperl-guts-l] [Bug 2713] [TODO] Update core Infernal parsing to v1.0, add related tests to bioperl-run In-Reply-To: Message-ID: <200907031846.n63Ik4th017499@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2713 ------- Comment #2 from cjfields at bioperl.org 2009-07-03 14:46 EST ------- Both parser and wrapper updated; tests to be added shortly. Will close out after tests are committed. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Fri Jul 3 16:30:47 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 3 Jul 2009 16:30:47 -0400 Subject: [Bioperl-guts-l] [15821] bioperl-live/trunk/Bio: catch edge cases where we unintentionally pass an undef value Message-ID: <200907032030.n63KUlab002517@dev.open-bio.org> Revision: 15821 Author: cjfields Date: 2009-07-03 16:30:47 -0400 (Fri, 03 Jul 2009) Log Message: ----------- catch edge cases where we unintentionally pass an undef value Modified Paths: -------------- bioperl-live/trunk/Bio/Root/IO.pm bioperl-live/trunk/Bio/Tools/Run/WrapperBase.pm Modified: bioperl-live/trunk/Bio/Root/IO.pm =================================================================== --- bioperl-live/trunk/Bio/Root/IO.pm 2009-07-03 18:45:55 UTC (rev 15820) +++ bioperl-live/trunk/Bio/Root/IO.pm 2009-07-03 20:30:47 UTC (rev 15821) @@ -490,9 +490,12 @@ Usage : $obj->_pushback($newvalue) Function: puts a line previously read with _readline back into a buffer. buffer can hold as many lines as system memory permits. - Example : - Returns : + Example : $obj->_pushback($newvalue) + Returns : none Args : newvalue + Note : This is only supported for pushing back data ending with the + current, localized value of $/. Using this method to push modified + data back onto the buffer stack is not supported; see bug 843. =cut @@ -626,9 +629,10 @@ sub exists_exe { my ($self, $exe) = @_; - $exe = $self if(!(ref($self) || $exe)); + $self->throw("Must pass a defined value to exists_exe") unless defined $exe; + $exe = $self if (!(ref($self) || $exe)); $exe .= '.exe' if(($^O =~ /mswin/i) && ($exe !~ /\.(exe|com|bat|cmd)$/i)); - return $exe if(-e $exe); # full path and exists + return $exe if (-e $exe); # full path and exists # Ewan's comment. I don't think we need this. People should not be # asking for a program with a pathseparator starting it Modified: bioperl-live/trunk/Bio/Tools/Run/WrapperBase.pm =================================================================== --- bioperl-live/trunk/Bio/Tools/Run/WrapperBase.pm 2009-07-03 18:45:55 UTC (rev 15820) +++ bioperl-live/trunk/Bio/Tools/Run/WrapperBase.pm 2009-07-03 20:30:47 UTC (rev 15821) @@ -346,12 +346,11 @@ =cut sub program_path { - my ($self) = @_; - my @path; - push @path, $self->program_dir if $self->program_dir; - push @path, $self->program_name.($^O =~ /mswin/i ?'.exe':''); - - return File::Spec->catfile(@path); + my ($self) = @_; + my @path; + push @path, $self->program_dir if $self->program_dir; + push @path, $self->program_name.($^O =~ /mswin/i ? '.exe' : '') if $self->program_name; + return File::Spec->catfile(@path); } =head2 program_dir From cjfields at dev.open-bio.org Fri Jul 3 16:43:41 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 3 Jul 2009 16:43:41 -0400 Subject: [Bioperl-guts-l] [15822] bioperl-run/trunk: add some infernal tests (data to follow) Message-ID: <200907032043.n63KhfY5002549@dev.open-bio.org> Revision: 15822 Author: cjfields Date: 2009-07-03 16:43:41 -0400 (Fri, 03 Jul 2009) Log Message: ----------- add some infernal tests (data to follow) Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm Added Paths: ----------- bioperl-run/trunk/t/Infernal.t Modified: bioperl-run/trunk/Bio/Tools/Run/Infernal.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-03 20:30:47 UTC (rev 15821) +++ bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-03 20:43:41 UTC (rev 15822) @@ -428,18 +428,7 @@ sub program { my $self = shift; - if (@_) { - my $p = shift; - $self->throw("Program '$p' not supported") - if !exists $INFERNAL_PROGRAM{lc $p}; - $self->{'_program'} = lc $p; - # set up cache of valid parameters - while (my ($p, $data) = each %INFERNAL_PARAMS) { - my %in_exe = map {$_ => 1} @$data[2..$#{$data}]; - $self->{valid_params}->{$p} = 1 if exists $in_exe{$self->{'_program'}}; - } - } - return $self->{'_program'}; + return $self->program_name(@_); } =head2 program_name @@ -454,7 +443,18 @@ sub program_name { my ($self) = shift; - return $self->program(@_); + if (@_) { + my $p = shift; + $self->throw("Program '$p' not supported") + if !exists $INFERNAL_PROGRAM{lc $p}; + $self->{'_program'} = lc $p; + # set up cache of valid parameters + while (my ($p, $data) = each %INFERNAL_PARAMS) { + my %in_exe = map {$_ => 1} @$data[2..$#{$data}]; + $self->{valid_params}->{$p} = 1 if exists $in_exe{$self->{'_program'}}; + } + } + return $self->{'_program'}; } =head2 model_file @@ -870,7 +870,7 @@ $self->program_name, $self->model_file, $self->outfile_name); - delete $params{o} if exists $params{o}; + delete $params{o} if exists $params{o}; # outfile... if (!defined($model) && $prog ne 'cmbuild') { @@ -911,7 +911,12 @@ } elsif ($prog eq 'cmbuild') { $self->throw('cmbuild requires one alignment file') if !defined($aligns); - push @{$args{'input'}}, ($outfile, @$aligns); + if ($model) { + push @{$args{'input'}}, ($model, @$aligns); + push @{$args{'redirect'}}, "> $outfile" if $outfile; + } else { + push @{$args{'input'}}, ($outfile, @$aligns); + } } elsif ($prog eq 'cmemit') { push @{$args{'input'}}, $model; push @{$args{'input'}}, $outfile if $outfile; @@ -924,10 +929,12 @@ # this assumes UNIX (win not supported) # this can only be implemented under some circumstances, otherwise piping # output may not work where STDOUT is used (i.e. cmsearch) - $string .= ' 2> /dev/null' if $self->quiet; + $string .= ' > /dev/null' if $self->quiet && $prog ne 'cmsearch'; $string; } +############### PRIVATE ############### + #=head2 _run # # Title : _run @@ -983,7 +990,7 @@ # outfile or tempfile-based my @args; # file output - if ($out || ($prog eq 'cmbuild' && $model) ) { + if ($out) { my $status = system($str); if($status || !-e $out || -z $out ) { my $error = ($!) ? "$! Status: $status" : "Status: $status"; @@ -1064,8 +1071,6 @@ return $inputfile; } -############### PRIVATE ############### - # this is a private sub used to regenerate the class data structures, # dumped to STDOUT Added: bioperl-run/trunk/t/Infernal.t =================================================================== --- bioperl-run/trunk/t/Infernal.t (rev 0) +++ bioperl-run/trunk/t/Infernal.t 2009-07-03 20:43:41 UTC (rev 15822) @@ -0,0 +1,246 @@ +# -*-Perl-*- +## Bioperl Test Harness Script for Modules + +use strict; +our $NUMTESTS; +our %INFERNAL_TESTS; + +BEGIN { + $NUMTESTS = 1; # base number of tests (those not in blocks) + + # I have set up eutils tests to run in sections for easier test maintenance + # and keeping track of problematic tests. The below hash is the list of + # tests, with test number and coderef. + + # these now run very simple tests for connectivity and data sampling + # main tests now with the parser + + %INFERNAL_TESTS = ( + 'params' => {'tests' => 13, + 'sub' => \&simple_param_tests}, + #'cmalign_norm' => {'tests' => 5, + # 'sub' => \&cmalign_norm}, + #'cmalign_merge' => {'tests' => 5, + # 'sub' => \&cmalign_merge}, + 'cmsearch' => {'tests' => 7, + 'sub' => \&cmsearch}, + 'cmbuild' => {'tests' => 4, + 'sub' => \&cmbuild}, + 'cmstat' => {'tests' => 2, + 'sub' => \&cmstat}, + #'cmcalibrate' => {'tests' => 5, + # 'sub' => \&cmcalibrate}, + 'cmscore' => {'tests' => 2, + 'sub' => \&cmscore}, + #'cmemit' => {'tests' => 5, + # 'sub' => \&cmemit}, + ); + $NUMTESTS += $INFERNAL_TESTS{$_}->{'tests'} for (keys %INFERNAL_TESTS); + + use Bio::Root::Test; + + test_begin(-tests => $NUMTESTS); + use_ok('Bio::Tools::Run::Infernal'); +} + +for my $test (keys %INFERNAL_TESTS) { + $INFERNAL_TESTS{$test}->{'sub'}->(); +} + +# test out parameters and command string building +sub simple_param_tests { + my %executable = ( + 'cmsearch' => { + params => { + outfile => 'foo.txt', # this won't get set (not a valid param for cmsearch) + o => 'bar.infernal', # test multiple outfile designations + dna => 1, + tabfile => 'tab.txt', # param + gcfile => 'gc.txt', + g => 'glarg', + }, + test1 => 'cmsearch --dna -g --gcfile gc.txt --tabfile tab.txt -o bar.infernal baz.cm seq1.txt', + test2 => 'cmsearch --dna -g --gcfile gc.txt --tabfile tab.txt -o bar.infernal arg.cm seq1.txt', + seq_files => ['seq1.txt'], + align_files => [], + }, + 'cmemit' => { + params => { + -n => 10, + -a => 1, + -l => 1, + -rna => 1, + -tfile => 'trees.txt', + -outfile_name => 'seqs.stk' + }, + test1 => 'cmemit -a -l --rna -n 10 --tfile trees.txt baz.cm seqs.stk', + test2 => 'cmemit -a -l --rna -n 10 --tfile trees.txt arg.cm seqs.stk', + seq_files => [], + align_files => [], + }, + 'cmscore' => { + params => { + n => 10, + a => 1, + l => 1, + rna => 1, + mxsize => 4096, + outfile => 'seqs.fna', # note this is different from outfile_name + -outfile_name => 'seqs.stk' + }, + test1 => 'cmscore -a -l --mxsize 4096 -n 10 --outfile seqs.fna baz.cm > seqs.stk', + test2 => 'cmscore -a -l --mxsize 4096 -n 10 --outfile seqs.fna arg.cm > seqs.stk', + seq_files => [], + align_files => [], + }, + 'cmalign' => { + params => { + -tau => 1e-7, + -l => 1, + -cyk => 1, + -o => 'aligns.stk' + }, + test1 => 'cmalign --cyk -l --tau 1e-07 -o aligns.stk baz.cm seq1.fas', + test2 => 'cmalign --cyk -l --tau 1e-07 -o aligns.stk arg.cm seq1.fas', + test3 => 'cmalign --cyk -l --merge --tau 1e-07 -o aligns.stk arg.cm alns1.stk alns2.stk', + seq_files => ['seq1.fas'], + align_files => ['alns1.stk', 'alns2.stk'], + }, + 'cmbuild' => { + params => { + -binary => 1, + -ehmmre => 12, + }, + test1 => 'cmbuild --binary --ehmmre 12 baz.cm alns1.stk', + test2 => 'cmbuild --binary --ehmmre 12 arg.cm alns1.stk', + seq_files => ['seq1.fas'], + align_files => ['alns1.stk'], + }, + 'cmcalibrate'=> { + params => { + -s => 12, + '-exp-no-qdb' => 1, # note use of quotes + }, + test1 => 'cmcalibrate --exp-no-qdb -s 12 baz.cm', + test2 => 'cmcalibrate --exp-no-qdb -s 12 arg.cm', + }, + ); + for my $exe (sort keys %executable) { + my %p = %{$executable{$exe}{params}}; + my $factory = Bio::Tools::Run::Infernal->new(-program => $exe, + -model_file => 'baz.cm', + %p); + #if ($exe eq 'cmbuild') { $factory->model_file('baz.cm') } + like($factory->to_exe_string(-seq_files => $executable{$exe}{seq_files}, + -align_files => $executable{$exe}{align_files}), + qr/$executable{$exe}{test1}/,"$exe parameter setting"); + $factory->model_file('arg.cm'); + like($factory->to_exe_string(-seq_files => $executable{$exe}{seq_files}, + -align_files => $executable{$exe}{align_files}), + qr/$executable{$exe}{test2}/,"$exe parameter setting"); + if ($exe eq 'cmalign') { + $factory->set_parameters(merge => 1); + like($factory->to_exe_string(-seq_files => $executable{$exe}{seq_files}, + -align_files => $executable{$exe}{align_files}), + qr/$executable{$exe}{test3}/,"$exe parameter setting"); + } + } +} + +sub cmsearch { + my ($model, $input) = (test_input_file('purine.c.cm'), test_input_file('xprt.gb')); + my $factory = Bio::Tools::Run::Infernal->new(-model_file => $model, + -program => 'cmsearch', + -verbose => test_debug()); + + SKIP: { + # this is giving me an odd error, needs debugging @@ Diff output truncated at 10000 characters. @@ From cjfields at dev.open-bio.org Fri Jul 3 16:44:34 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 3 Jul 2009 16:44:34 -0400 Subject: [Bioperl-guts-l] [15823] bioperl-run/trunk/t/data: add some infernal data Message-ID: <200907032044.n63KiYWA002580@dev.open-bio.org> Revision: 15823 Author: cjfields Date: 2009-07-03 16:44:34 -0400 (Fri, 03 Jul 2009) Log Message: ----------- add some infernal data Added Paths: ----------- bioperl-run/trunk/t/data/purine.1.sto bioperl-run/trunk/t/data/purine.c.cm bioperl-run/trunk/t/data/xprt.gb Added: bioperl-run/trunk/t/data/purine.1.sto =================================================================== --- bioperl-run/trunk/t/data/purine.1.sto (rev 0) +++ bioperl-run/trunk/t/data/purine.1.sto 2009-07-03 20:44:34 UTC (rev 15823) @@ -0,0 +1,10 @@ +# STOCKHOLM 1.0 +#=GF ID Purine +#=GF AU Boese B, Barrick JE, Breaker RR + +Purine1 AAAAUUGAAUAUCGUUUUACUUGUUUAUGUCGUGAAUUGGCACGACGUUU +#=GC SS_cons .................<<<<<<<<...<<<<<<......>>>>>>.... + +Purine1 CUACAAGGUGCCGGAACACCUAACAAUAAGUAAGUCAGCAGUGAGAU +#=GC SS_cons ....<<<<<<......>>>>>>..>>>>>>>>............... +// Added: bioperl-run/trunk/t/data/purine.c.cm =================================================================== --- bioperl-run/trunk/t/data/purine.c.cm (rev 0) +++ bioperl-run/trunk/t/data/purine.c.cm 2009-07-03 20:44:34 UTC (rev 15823) @@ -0,0 +1,421 @@ +INFERNAL-1 [1.0] +NAME Purine +STATES 300 +NODES 83 +ALPHABET 1 +ELSELF -0.08926734 +WBETA 1e-07 +NSEQ 1 +EFFNSEQ 0.947 +CLEN 97 +BCOM cmbuild -F purine.1.c.cm purine.1.sto +BDATE Tue Jun 10 14:02:19 2008 +CCOM cmcalibrate --mpi -s 33 purine.1.c.cm +CDATE Tue Jun 10 14:04:59 2008 +NULL 0.000 0.000 0.000 0.000 +PART 1 0 100 +E-LC 0 0.64833 -7.14264 2.12133 1500000 456645 0.002464 +E-GC 0 0.46448 -14.38873 -5.02654 1500000 29013 0.012925 +E-LI 0 0.65194 -4.96886 3.85984 1500000 355523 0.003164 +E-GI 0 0.49174 -9.59692 -0.75492 1500000 28995 0.012933 +E-LV 0 0.59065 -2.13140 5.57363 15000000 106563 0.010557 +E-GV 0 0.43778 -7.76006 5.03247 15000000 101436 0.003697 +E-LF 0 0.66077 2.12933 8.99900 15000000 105326 0.010681 +E-GF 0 0.51245 -2.36642 8.56322 15000000 101500 0.003695 +FT-LC 34 0.99500 10000 1500000 0 + 0.00452996 0.00389903 0.00341422 0.00288634 0.00265286 0.00187979 0.000984222 0.000936242 0.0007468 0.000598133 0.000529425 0.000385634 0.000285602 0.000236949 0.000152016 0.000141459 0.000123592 7.17917e-05 5.25729e-05 4.22648e-05 3.17222e-05 3.0333e-05 2.67051e-05 2.35364e-05 2.01571e-05 1.36351e-05 1.1297e-05 9.49059e-06 9.23133e-06 1.14947e-08 4.15804e-09 4.0082e-09 2.4551e-09 1.49663e-09 + 5212.25 4579.09 3706.37 3297.28 2830.51 2471.93 2078.98 1778.8 1598.22 1403.15 1178.54 1003.72 829.781 733.816 642.125 569.365 502.853 446.759 388.875 347.556 259.185 231.188 207.856 186.386 162.559 146.25 108.274 96.3867 94.0454 77.1452 38.1921 23.2212 13.7596 9.40454 +FT-LI 36 0.99500 10000 1500000 0 + 0.00850336 0.00741054 0.00431807 0.00363433 0.00307697 0.00182873 0.00124959 0.00107591 0.000839932 0.000519964 0.000482718 0.000353331 0.000301383 0.00023473 0.000163517 0.000143345 0.000138557 0.000106854 7.70022e-05 6.39427e-05 4.78061e-05 3.98837e-05 3.73716e-05 3.3184e-05 3.11406e-05 3.07918e-05 2.42366e-05 1.61341e-05 1.29786e-05 9.02737e-06 8.21719e-06 1.19501e-08 1.08007e-08 1.04462e-08 6.93206e-09 2.01991e-09 + 5212.25 4579.09 3706.37 3297.28 2830.51 2531.44 2226.87 1847.07 1648.64 1403.15 1208.51 1003.72 829.781 733.816 642.125 569.365 502.853 446.759 388.875 347.556 290.19 259.185 231.188 207.856 186.386 162.559 146.25 124.308 108.274 96.3867 94.0454 75.4313 38.1921 29.8295 23.2212 9.40454 +FT-GC 36 0.99500 10000 1500000 0 + 24.1331 9.58982 5.9462 2.00331 1.20472 0.426064 0.215394 0.123414 0.074245 0.054071 0.0492988 0.0410377 0.0224058 0.0160269 0.0122088 0.00592477 0.00450385 0.00296819 0.00250509 0.00228036 0.0020282 0.00140472 0.00105539 0.000873973 0.000585355 0.000497859 0.000437115 0.000303842 0.000278283 0.000201632 0.000155905 0.000124376 7.78412e-05 7.46192e-05 6.76104e-08 5.18833e-08 + 5427.93 4831.86 4124.25 3650.71 3203.51 2762.54 2371.3 2101.18 1860.88 1632.93 1461.83 1310.67 1084.85 947.093 838.779 720.728 612.036 510.235 438.648 394.703 352.983 315.349 260.616 232.83 209.183 187.264 167.299 149.769 134.35 120.581 104.732 92.0909 80.4795 73.8869 8.97747 7.38869 +FT-GI 36 0.99500 10000 1500000 0 + 18.4719 6.8295 3.27761 1.18195 0.769228 0.332874 0.161281 0.112362 0.0751909 0.0472083 0.0377739 0.0324258 0.022067 0.0163589 0.0109462 0.00556627 0.00493464 0.0039373 0.00348885 0.00301673 0.00262653 0.0021461 0.00142516 0.00103985 0.000632597 0.000497663 0.000423556 0.000259907 0.000226887 0.00020694 0.000190256 0.000107139 8.872e-05 8.44254e-05 1.4384e-07 1.0721e-07 + 5427.93 4831.86 4124.25 3650.71 3203.51 2762.54 2371.3 2101.18 1860.88 1632.93 1461.83 1310.67 1084.85 947.093 838.779 720.728 612.036 549.592 477.841 416.95 368.698 315.349 260.616 232.83 209.183 187.264 167.299 149.769 134.35 120.891 104.732 92.0909 80.4795 73.8869 8.97747 7.38869 +MODEL: + [ ROOT 0 ] + S 0 -1 0 1 4 -6.476 -6.683 -0.074 -5.097 + IL 1 1 2 1 4 -1.686 -2.369 -1.117 -4.855 0.000 0.000 0.000 0.000 + IR 2 2 3 2 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 1 ] + ML 3 2 3 5 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 4 2 3 5 3 -6.174 -1.687 -0.566 + IL 5 5 3 5 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 2 ] + ML 6 5 3 8 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 7 5 3 8 3 -6.174 -1.687 -0.566 + IL 8 8 3 8 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 3 ] + ML 9 8 3 11 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 10 8 3 11 3 -6.174 -1.687 -0.566 + IL 11 11 3 11 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 4 ] + ML 12 11 3 14 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 13 11 3 14 3 -6.174 -1.687 -0.566 + IL 14 14 3 14 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 5 ] + ML 15 14 3 17 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 16 14 3 17 3 -6.174 -1.687 -0.566 + IL 17 17 3 17 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 6 ] + ML 18 17 3 20 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 19 17 3 20 3 -6.174 -1.687 -0.566 + IL 20 20 3 20 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 7 ] + ML 21 20 3 23 3 -7.532 -0.028 -6.186 -0.482 -1.493 1.257 -0.893 + D 22 20 3 23 3 -6.174 -1.687 -0.566 + IL 23 23 3 23 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 8 ] + ML 24 23 3 26 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 25 23 3 26 3 -6.174 -1.687 -0.566 + IL 26 26 3 26 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 9 ] + ML 27 26 3 29 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 28 26 3 29 3 -6.174 -1.687 -0.566 + IL 29 29 3 29 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 10 ] + ML 30 29 3 32 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 31 29 3 32 3 -6.174 -1.687 -0.566 + IL 32 32 3 32 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 11 ] + ML 33 32 3 35 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 34 32 3 35 3 -6.174 -1.687 -0.566 + IL 35 35 3 35 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 12 ] + ML 36 35 3 38 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 37 35 3 38 3 -6.174 -1.687 -0.566 + IL 38 38 3 38 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 13 ] + ML 39 38 3 41 3 -7.532 -0.028 -6.186 -0.337 1.020 -1.177 -0.439 + D 40 38 3 41 3 -6.174 -1.687 -0.566 + IL 41 41 3 41 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 14 ] + ML 42 41 3 44 3 -7.532 -0.028 -6.186 -0.482 -1.493 1.257 -0.893 @@ Diff output truncated at 10000 characters. @@ From cjfields at dev.open-bio.org Sat Jul 4 00:15:05 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 4 Jul 2009 00:15:05 -0400 Subject: [Bioperl-guts-l] [15824] bioperl-run/trunk: more commits and tests; cmcalibrate works but commenting tests ( takes very long unless one enables --mpi) Message-ID: <200907040415.n644F5OI002989@dev.open-bio.org> Revision: 15824 Author: cjfields Date: 2009-07-04 00:15:05 -0400 (Sat, 04 Jul 2009) Log Message: ----------- more commits and tests; cmcalibrate works but commenting tests (takes very long unless one enables --mpi) Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm bioperl-run/trunk/t/Infernal.t Modified: bioperl-run/trunk/Bio/Tools/Run/Infernal.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-03 20:44:34 UTC (rev 15823) +++ bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-04 04:15:05 UTC (rev 15824) @@ -376,41 +376,26 @@ my $self = $class->SUPER::new(@args); # these are specific parameters we do not want passed on to set_parameters - my ($program, $model, $tf, $validate, $q, $o1, $o2) = + my ($program, $model, $validate, $q, $o1, $o2) = $self->_rearrange([qw(PROGRAM MODEL_FILE - TEMPFILE VALIDATE_PARAMETERS QUIET OUTFILE_NAME O)], @args); - $q && $self->quiet($q); if ($o1 && $o2) { $self->warn("Only assign to either -outfile_name or -o, not both;"); } my $out = $o1 || $o2; - if ($tf && $out) { - $self->throw("Can't set both -outfile_name/-o and -tempfile"); - } - - $self->io->_initialize_io(); - $self->validate_parameters($validate); - $program && $self->program($program); - - if ($tf && !$out) { - my ($tfh, $outfile) = $self->io->tempfile(-dir=> $self->io->tempdir()); - close($tfh); - undef $tfh; - $self->outfile_name($outfile); - } else { - $out ||= ''; - $self->outfile_name($out); - } - $self->set_parameters(@args); - + $q && $self->quiet($q); + $program && $self->program($program); $model && $self->model_file($model); + $out ||= ''; + $self->outfile_name($out); + $self->io->_initialize_io(); + $self->set_parameters(@args); return $self; } @@ -577,13 +562,17 @@ return $self->_run(-seq_files => [$infile1]); } elsif ( $seq[0]->isa("Bio::Align::AlignI") ) { if (scalar(@seq) != 2) { - + $self->throw("") } - my $infile1 = $self->_writeSeqFile($seq[0]); - return $self->_run(-seq_files => [$infile1]); + my $infile1 = $self->_writeAlignFile($seq[0]); + my $infile2 = $self->_writeAlignFile($seq[1]); + return $self->_run(-align_files => [$infile1, $infile2]); } } else { # we can maybe add a check for the file extension and try to DTRT + my %params = $self->get_parameters('valid'); + $params{merge} ? return $self->_run(-align_files => \@seq): + return $self->_run(-seq_files => \@seq); return $self->_run(-seq_files => \@seq); } } @@ -872,7 +861,6 @@ $self->outfile_name); delete $params{o} if exists $params{o}; - # outfile... if (!defined($model) && $prog ne 'cmbuild') { $self->throw("model_file() not defined") } @@ -918,18 +906,27 @@ push @{$args{'input'}}, ($outfile, @$aligns); } } elsif ($prog eq 'cmemit') { - push @{$args{'input'}}, $model; - push @{$args{'input'}}, $outfile if $outfile; + if (!$outfile) { + $self->throw('cmemit requires an outfile_name; tempfile support not implemented yet'); + } else { + push @{$args{'input'}}, ($model, ,$outfile); + } } + # quiet! + if ($self->quiet && $prog ne 'cmsearch') { + if ($prog eq 'cmalign') { + push @{$args{switch}}, '-q' if !exists $params{q}; + } else { + push @{$args{redirect}}, '> /dev/null'; + } + } + my $string = "$exe ".join(' ',(@{$args{switch}}, @{$args{param}}, @{$args{input}}, @{$args{redirect}})); - # this assumes UNIX (win not supported) - # this can only be implemented under some circumstances, otherwise piping - # output may not work where STDOUT is used (i.e. cmsearch) - $string .= ' > /dev/null' if $self->quiet && $prog ne 'cmsearch'; + $string; } @@ -971,23 +968,16 @@ my $str = $self->to_exe_string(@_); $self->debug("Infernal command: $str\n"); - # retrieve available params here to determine whether we are doing the right thing below + my %has = $self->get_parameters('valid'); - # cmsearch always returns SearchIO - # cmbuild does not return anything (outfile = STDOUT, cmfile is written to model() ) - # cmscore does not return anything (outfile = STDOUT or outfile) - # cmemit - AlignIO or SeqIO (based on parameter settings) - # cmalign - AlignIO - my $obj = ($prog eq 'cmsearch') ? Bio::SearchIO->new(-format => 'infernal', -version => $version, -model => $model) : ($prog eq 'cmalign' ) ? Bio::AlignIO->new(-format => 'stockholm') : - ($prog eq 'cmemit') ? Bio::AlignIO->new(-format => 'stockholm') : + ($prog eq 'cmemit' && $has{a}) ? Bio::AlignIO->new(-format => 'stockholm') : ($prog eq 'cmemit') ? Bio::SeqIO->new(-format => 'fasta') : undef; - # outfile or tempfile-based my @args; # file output if ($out) { Modified: bioperl-run/trunk/t/Infernal.t =================================================================== --- bioperl-run/trunk/t/Infernal.t 2009-07-03 20:44:34 UTC (rev 15823) +++ bioperl-run/trunk/t/Infernal.t 2009-07-04 04:15:05 UTC (rev 15824) @@ -6,7 +6,7 @@ our %INFERNAL_TESTS; BEGIN { - $NUMTESTS = 1; # base number of tests (those not in blocks) + $NUMTESTS = 3; # base number of tests (those not in blocks) # I have set up eutils tests to run in sections for easier test maintenance # and keeping track of problematic tests. The below hash is the list of @@ -18,22 +18,22 @@ %INFERNAL_TESTS = ( 'params' => {'tests' => 13, 'sub' => \&simple_param_tests}, - #'cmalign_norm' => {'tests' => 5, - # 'sub' => \&cmalign_norm}, - #'cmalign_merge' => {'tests' => 5, - # 'sub' => \&cmalign_merge}, + 'cmalign' => {'tests' => 6, + 'sub' => \&cmalign_norm}, + # need to add merge tests 'cmsearch' => {'tests' => 7, 'sub' => \&cmsearch}, 'cmbuild' => {'tests' => 4, 'sub' => \&cmbuild}, 'cmstat' => {'tests' => 2, 'sub' => \&cmstat}, - #'cmcalibrate' => {'tests' => 5, + # leave this one commented (may run for quite a while dep. on CPU) + #'cmcalibrate' => {'tests' => 2, # 'sub' => \&cmcalibrate}, 'cmscore' => {'tests' => 2, 'sub' => \&cmscore}, - #'cmemit' => {'tests' => 5, - # 'sub' => \&cmemit}, + 'cmemit' => {'tests' => 6, + 'sub' => \&cmemit}, ); $NUMTESTS += $INFERNAL_TESTS{$_}->{'tests'} for (keys %INFERNAL_TESTS); @@ -41,6 +41,8 @@ test_begin(-tests => $NUMTESTS); use_ok('Bio::Tools::Run::Infernal'); + use_ok('Bio::SeqIO'); + use_ok('Bio::AlignIO'); } for my $test (keys %INFERNAL_TESTS) { @@ -244,3 +246,77 @@ } } +sub cmalign_norm { + my ($cm, $seqfile) = (test_input_file('purine.c.cm'), + test_input_file('purine.added.fa')); + my $factory = Bio::Tools::Run::Infernal->new(-model_file => $cm, + -program => 'cmalign'); + SKIP: { + test_skip(-requires_executable => $factory, + -tests => 6); + + my @seqs; + + my $stream = $factory->cmalign($seqfile); + isa_ok($stream, 'Bio::AlignIO', 'cmalign works'); + my $aln = $stream->next_aln; + isa_ok($aln, 'Bio::Align::AlignI'); + is($aln->num_sequences, 2); + + my $seqio = Bio::SeqIO->new(-format => 'fasta', -file => $seqfile); + + while (my $seq = $seqio->next_seq) { + push @seqs, $seq; + } + + $stream = $factory->cmalign(@seqs); + isa_ok($stream, 'Bio::AlignIO', 'cmalign works'); + $aln = $stream->next_aln; + isa_ok($aln, 'Bio::Align::AlignI'); + is($aln->num_sequences, 2); + } +} + +sub cmemit { + my ($cm, $outfile) = (test_input_file('purine.c.cm'), test_output_file()); + my $factory = Bio::Tools::Run::Infernal->new(-model_file => $cm, + -program => 'cmemit', + -outfile_name => $outfile, + -quiet => 1 + ); + SKIP: { + test_skip(-requires_executable => $factory, + -tests => 6); + + # seqs (default) + my $stream = $factory->cmemit(); + isa_ok($stream, 'Bio::SeqIO', 'cmemit works'); + my $seq = $stream->next_seq; + isa_ok($seq, 'Bio::PrimarySeqI'); + is($seq->display_id, 'Purine-1'); + + # alignment (-a flag) + $factory->set_parameters(-a => 1); + $stream = $factory->cmemit(); + isa_ok($stream, 'Bio::AlignIO', 'cmemit works'); + my $aln = $stream->next_aln; + isa_ok($aln, 'Bio::Align::AlignI'); + is($aln->num_sequences, 10); + } +} + +sub cmcalibrate { + my $cm = test_input_file('purine.cm'); + my $factory = Bio::Tools::Run::Infernal->new(-model_file => $cm, + -program => 'cmcalibrate'); + SKIP: { + test_skip(-requires_executable => $factory, + -tests => 2); + + # seqs (default) + my $success = $factory->cmcalibrate(); + ok($success, 'cmcalibrate successful'); + cmp_ok(-M $cm, '<=' ,1); + } +} + From cjfields at dev.open-bio.org Sat Jul 4 00:16:13 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 4 Jul 2009 00:16:13 -0400 Subject: [Bioperl-guts-l] [15825] bioperl-live/trunk/Bio/AlignIO/stockholm.pm: small stockholm fix to bypass infernal cruft for default output Message-ID: <200907040416.n644GDgu003020@dev.open-bio.org> Revision: 15825 Author: cjfields Date: 2009-07-04 00:16:13 -0400 (Sat, 04 Jul 2009) Log Message: ----------- small stockholm fix to bypass infernal cruft for default output Modified Paths: -------------- bioperl-live/trunk/Bio/AlignIO/stockholm.pm Modified: bioperl-live/trunk/Bio/AlignIO/stockholm.pm =================================================================== --- bioperl-live/trunk/Bio/AlignIO/stockholm.pm 2009-07-04 04:15:05 UTC (rev 15824) +++ bioperl-live/trunk/Bio/AlignIO/stockholm.pm 2009-07-04 04:16:13 UTC (rev 15825) @@ -382,8 +382,8 @@ my $handler = $self->alignhandler; # advance to alignment header - while( defined(my $line = $self->_readline) ) { - if ($line =~ m{^#\s*STOCKHOLM\s+}xmso) { + while( defined(my $line = $self->_readline) ) { + if ($line =~ m{^\#\s*STOCKHOLM\s+}xmso) { last; } } From cjfields at dev.open-bio.org Sat Jul 4 00:17:58 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 4 Jul 2009 00:17:58 -0400 Subject: [Bioperl-guts-l] [15826] bioperl-run/trunk/t/data/purine.cm: for cmcalibrate tests Message-ID: <200907040417.n644HwBE003051@dev.open-bio.org> Revision: 15826 Author: cjfields Date: 2009-07-04 00:17:57 -0400 (Sat, 04 Jul 2009) Log Message: ----------- for cmcalibrate tests Added Paths: ----------- bioperl-run/trunk/t/data/purine.cm Added: bioperl-run/trunk/t/data/purine.cm =================================================================== --- bioperl-run/trunk/t/data/purine.cm (rev 0) +++ bioperl-run/trunk/t/data/purine.cm 2009-07-04 04:17:57 UTC (rev 15826) @@ -0,0 +1,421 @@ +INFERNAL-1 [1.0] +NAME Purine +STATES 300 +NODES 83 +ALPHABET 1 +ELSELF -0.08926734 +WBETA 1e-07 +NSEQ 1 +EFFNSEQ 0.947 +CLEN 97 +BCOM cmbuild -F purine.1.c.cm purine.1.sto +BDATE Tue Jun 10 14:02:19 2008 +CCOM cmcalibrate --mpi -s 33 purine.1.c.cm +CDATE Tue Jun 10 14:04:59 2008 +NULL 0.000 0.000 0.000 0.000 +PART 1 0 100 +E-LC 0 0.64833 -7.14264 2.12133 1500000 456645 0.002464 +E-GC 0 0.46448 -14.38873 -5.02654 1500000 29013 0.012925 +E-LI 0 0.65194 -4.96886 3.85984 1500000 355523 0.003164 +E-GI 0 0.49174 -9.59692 -0.75492 1500000 28995 0.012933 +E-LV 0 0.59065 -2.13140 5.57363 15000000 106563 0.010557 +E-GV 0 0.43778 -7.76006 5.03247 15000000 101436 0.003697 +E-LF 0 0.66077 2.12933 8.99900 15000000 105326 0.010681 +E-GF 0 0.51245 -2.36642 8.56322 15000000 101500 0.003695 +FT-LC 34 0.99500 10000 1500000 0 + 0.00452996 0.00389903 0.00341422 0.00288634 0.00265286 0.00187979 0.000984222 0.000936242 0.0007468 0.000598133 0.000529425 0.000385634 0.000285602 0.000236949 0.000152016 0.000141459 0.000123592 7.17917e-05 5.25729e-05 4.22648e-05 3.17222e-05 3.0333e-05 2.67051e-05 2.35364e-05 2.01571e-05 1.36351e-05 1.1297e-05 9.49059e-06 9.23133e-06 1.14947e-08 4.15804e-09 4.0082e-09 2.4551e-09 1.49663e-09 + 5212.25 4579.09 3706.37 3297.28 2830.51 2471.93 2078.98 1778.8 1598.22 1403.15 1178.54 1003.72 829.781 733.816 642.125 569.365 502.853 446.759 388.875 347.556 259.185 231.188 207.856 186.386 162.559 146.25 108.274 96.3867 94.0454 77.1452 38.1921 23.2212 13.7596 9.40454 +FT-LI 36 0.99500 10000 1500000 0 + 0.00850336 0.00741054 0.00431807 0.00363433 0.00307697 0.00182873 0.00124959 0.00107591 0.000839932 0.000519964 0.000482718 0.000353331 0.000301383 0.00023473 0.000163517 0.000143345 0.000138557 0.000106854 7.70022e-05 6.39427e-05 4.78061e-05 3.98837e-05 3.73716e-05 3.3184e-05 3.11406e-05 3.07918e-05 2.42366e-05 1.61341e-05 1.29786e-05 9.02737e-06 8.21719e-06 1.19501e-08 1.08007e-08 1.04462e-08 6.93206e-09 2.01991e-09 + 5212.25 4579.09 3706.37 3297.28 2830.51 2531.44 2226.87 1847.07 1648.64 1403.15 1208.51 1003.72 829.781 733.816 642.125 569.365 502.853 446.759 388.875 347.556 290.19 259.185 231.188 207.856 186.386 162.559 146.25 124.308 108.274 96.3867 94.0454 75.4313 38.1921 29.8295 23.2212 9.40454 +FT-GC 36 0.99500 10000 1500000 0 + 24.1331 9.58982 5.9462 2.00331 1.20472 0.426064 0.215394 0.123414 0.074245 0.054071 0.0492988 0.0410377 0.0224058 0.0160269 0.0122088 0.00592477 0.00450385 0.00296819 0.00250509 0.00228036 0.0020282 0.00140472 0.00105539 0.000873973 0.000585355 0.000497859 0.000437115 0.000303842 0.000278283 0.000201632 0.000155905 0.000124376 7.78412e-05 7.46192e-05 6.76104e-08 5.18833e-08 + 5427.93 4831.86 4124.25 3650.71 3203.51 2762.54 2371.3 2101.18 1860.88 1632.93 1461.83 1310.67 1084.85 947.093 838.779 720.728 612.036 510.235 438.648 394.703 352.983 315.349 260.616 232.83 209.183 187.264 167.299 149.769 134.35 120.581 104.732 92.0909 80.4795 73.8869 8.97747 7.38869 +FT-GI 36 0.99500 10000 1500000 0 + 18.4719 6.8295 3.27761 1.18195 0.769228 0.332874 0.161281 0.112362 0.0751909 0.0472083 0.0377739 0.0324258 0.022067 0.0163589 0.0109462 0.00556627 0.00493464 0.0039373 0.00348885 0.00301673 0.00262653 0.0021461 0.00142516 0.00103985 0.000632597 0.000497663 0.000423556 0.000259907 0.000226887 0.00020694 0.000190256 0.000107139 8.872e-05 8.44254e-05 1.4384e-07 1.0721e-07 + 5427.93 4831.86 4124.25 3650.71 3203.51 2762.54 2371.3 2101.18 1860.88 1632.93 1461.83 1310.67 1084.85 947.093 838.779 720.728 612.036 549.592 477.841 416.95 368.698 315.349 260.616 232.83 209.183 187.264 167.299 149.769 134.35 120.891 104.732 92.0909 80.4795 73.8869 8.97747 7.38869 +MODEL: + [ ROOT 0 ] + S 0 -1 0 1 4 -6.476 -6.683 -0.074 -5.097 + IL 1 1 2 1 4 -1.686 -2.369 -1.117 -4.855 0.000 0.000 0.000 0.000 + IR 2 2 3 2 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 1 ] + ML 3 2 3 5 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 4 2 3 5 3 -6.174 -1.687 -0.566 + IL 5 5 3 5 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 2 ] + ML 6 5 3 8 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 7 5 3 8 3 -6.174 -1.687 -0.566 + IL 8 8 3 8 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 3 ] + ML 9 8 3 11 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 10 8 3 11 3 -6.174 -1.687 -0.566 + IL 11 11 3 11 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 4 ] + ML 12 11 3 14 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 13 11 3 14 3 -6.174 -1.687 -0.566 + IL 14 14 3 14 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 5 ] + ML 15 14 3 17 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 16 14 3 17 3 -6.174 -1.687 -0.566 + IL 17 17 3 17 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 6 ] + ML 18 17 3 20 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 19 17 3 20 3 -6.174 -1.687 -0.566 + IL 20 20 3 20 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 7 ] + ML 21 20 3 23 3 -7.532 -0.028 -6.186 -0.482 -1.493 1.257 -0.893 + D 22 20 3 23 3 -6.174 -1.687 -0.566 + IL 23 23 3 23 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 8 ] + ML 24 23 3 26 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 25 23 3 26 3 -6.174 -1.687 -0.566 + IL 26 26 3 26 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 9 ] + ML 27 26 3 29 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 28 26 3 29 3 -6.174 -1.687 -0.566 + IL 29 29 3 29 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 10 ] + ML 30 29 3 32 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 31 29 3 32 3 -6.174 -1.687 -0.566 + IL 32 32 3 32 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 11 ] + ML 33 32 3 35 3 -7.532 -0.028 -6.186 1.498 -1.614 -1.435 -1.062 + D 34 32 3 35 3 -6.174 -1.687 -0.566 + IL 35 35 3 35 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 12 ] + ML 36 35 3 38 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 37 35 3 38 3 -6.174 -1.687 -0.566 + IL 38 38 3 38 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 13 ] + ML 39 38 3 41 3 -7.532 -0.028 -6.186 -0.337 1.020 -1.177 -0.439 + D 40 38 3 41 3 -6.174 -1.687 -0.566 + IL 41 41 3 41 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 14 ] + ML 42 41 3 44 3 -7.532 -0.028 -6.186 -0.482 -1.493 1.257 -0.893 + D 43 41 3 44 3 -6.174 -1.687 -0.566 + IL 44 44 3 44 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 15 ] + ML 45 44 3 47 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 + D 46 44 3 47 3 -6.174 -1.687 -0.566 + IL 47 47 3 47 3 -1.442 -0.798 -4.142 0.000 0.000 0.000 0.000 + [ MATL 16 ] + ML 48 47 3 50 3 -7.532 -0.028 -6.186 -0.327 -0.985 -1.116 1.161 @@ Diff output truncated at 10000 characters. @@ From cjfields at dev.open-bio.org Sat Jul 4 00:31:34 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 4 Jul 2009 00:31:34 -0400 Subject: [Bioperl-guts-l] [15827] bioperl-run/trunk/Bio/Tools/Run/Infernal.pm: doc fixes; cmscore may need some tweaking at some point... Message-ID: <200907040431.n644VYZ8003082@dev.open-bio.org> Revision: 15827 Author: cjfields Date: 2009-07-04 00:31:34 -0400 (Sat, 04 Jul 2009) Log Message: ----------- doc fixes; cmscore may need some tweaking at some point... Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm Modified: bioperl-run/trunk/Bio/Tools/Run/Infernal.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-04 04:17:57 UTC (rev 15826) +++ bioperl-run/trunk/Bio/Tools/Run/Infernal.pm 2009-07-04 04:31:34 UTC (rev 15827) @@ -10,7 +10,7 @@ # runs but works for now # April 2008 - add 0.81 parameters (may be removed in the 1.0 release) # -# June 2009 - updated for v1.0. No longer supporting pre-1.0 Infernal +# July 2009 - updated for v1.0. No longer supporting pre-1.0 Infernal =head1 NAME @@ -28,23 +28,29 @@ # this resets the program flag if previously set $factory->cmsearch(@seqs); # searches Bio::PrimarySeqI's based on set cov. model - # saves output to outfile_name or STDOUT + # saves output to optional outfile_name, returns + # Bio::SearchIO # only values which are allowed for a program are set, so one can use the same # wrapper for the following... - $factory->cmalign(@seqs); # aligns Bio::PrimarySeqI's to a set cov. model - # output to outfile_name - $factory->cmscore(@seqs); # scores set cov. model against Bio::PrimarySeqI's, + $factory->cmalign(@seqs); # aligns Bio::PrimarySeqI's to a set cov. model, + # --merge option allows two alignments generated + # from the same CM to be merged. + # output to outfile_name, returns Bio::AlignIO + $factory->cmscore(); # scores set cov. model against Bio::PrimarySeqI, # output to outfile_name/STDOUT. $factory->cmbuild($aln); # builds covariance model based on alignment - # CM to outfile_name (required here), output to STDOUT. - $factory->cmemit($file); # emits sequence from specified cov. model; - # set one if no file specified - $factory->cmcalibrate($file); # calibrates specified cov. model; - # set one if no file specified - $factory->cmstat($file); # summary stats for cov. model; - # set one if no file specified + # CM to outfile_name or model_file (one is required + # here), output to STDOUT. + $factory->cmemit(); # emits sequence from specified cov. model; + # set one if no file specified. output to + # outfile_name, returns Bio::SeqIO or (if -a is set) + # Bio::AlignIO + $factory->cmcalibrate($file); # calibrates specified cov. model; output to + # STDOUT + $factory->cmstat($file); # summary stats for cov. model; set one if no file + # specified; output to STDOUT # run based on the setting of the program parameter @@ -72,9 +78,9 @@ Wrapper module for Sean Eddy's Infernal suite of programs. The current implementation runs cmsearch, cmcalibrate, cmalign, cmemit, cmbuild, cmscore, -and cmstat. The only current BioPerl object returned is for cmsearch (as shown -in the SYNOPSIS); all others are sent to either the designated outfile, a -tempfile, or STDOUT. +and cmstat. cmsearch will return a Bio::SearchIO, cmemit a Bio::SeqIO/AlignIO, +and cmalign a Bio::AlignIO. All others send output to STDOUT. Optionally, +any program's output can be redirected to outfile_name. We HIGHLY suggest upgrading to Infernal 1.0. In that spirit, this wrapper now supports parameters for Infernal 1.0 only; for wrapping older versions of @@ -84,19 +90,10 @@ NOTE: Due to conflicts in the way Infernal parameters are now formatted vs. subroutine naming in Perl (specifically the inclusion of hyphens) and due to the very large number of parameters available, setting and resetting parameters via -set_parameters() and reset_parameters() is required. Only parameters that are -valid for the executable set via program()/program_name() are set, the others -are silently ignored at this time. +set_parameters() and reset_parameters() is required. All valid parameters can +be set, but only ones valid for the executable set via program()/program_name() +are used for calling the executables, the others are silently ignored. -Also of note is some minor conflation between the use of the WrapperBase -outfile_name() method, the -o option (which designates the outfile for cmsearch -and cmalign), and the -outfile option (which is the outfile for sequences from -cmscore). All three are allowed; in particular, the -outfile parameter from -cmscore is not the actual output from the program but is for sequence output -only. If both -o and -outfile_name is set, a warning is issued and outfile_name -is set. Note that -o is only available for cmsearch and cmalign, while -outfile_name is allowed for all programs. - =head1 FEEDBACK =head2 Mailing Lists From bugzilla-daemon at portal.open-bio.org Sat Jul 4 00:33:19 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 4 Jul 2009 00:33:19 -0400 Subject: [Bioperl-guts-l] [Bug 2713] [TODO] Update core Infernal parsing to v1.0, add related tests to bioperl-run In-Reply-To: Message-ID: <200907040433.n644XJlD004661@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2713 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #3 from cjfields at bioperl.org 2009-07-04 00:33 EST ------- First draft wrapper for Infernal 1.0 and parser now in svn (core, run) along with tests and any relevant fixes. Closing... -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 5 03:09:25 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 5 Jul 2009 03:09:25 -0400 Subject: [Bioperl-guts-l] [Bug 2869] New: Bio::Tree, fail to get children of root node through some way Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2869 Summary: Bio::Tree, fail to get children of root node through some way Product: BioPerl Version: unspecified Platform: PC OS/Version: All Status: NEW Severity: major Priority: P2 Component: Core Components AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: jiandingzhe at msn.com I found if: my $IN=Bio::TreeIO->new(-file=>'some_file'); my $root=$IN->next_tree->get_root_node; foreach my $child ($root->each_Descendant) { #do something here } It never give you child. An bug demonstration script will be attached soonly. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 5 03:10:29 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 5 Jul 2009 03:10:29 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907050710.n657ATRQ027063@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 ------- Comment #1 from jiandingzhe at msn.com 2009-07-05 03:10 EST ------- Created an attachment (id=1336) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1336&action=view) it firstly read the tree successfully, then use the method I give which cannot get any child -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 5 03:11:17 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 5 Jul 2009 03:11:17 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907050711.n657BHBl027107@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 ------- Comment #2 from jiandingzhe at msn.com 2009-07-05 03:11 EST ------- Created an attachment (id=1337) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1337&action=view) A tree file for read -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 5 03:12:15 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 5 Jul 2009 03:12:15 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907050712.n657CFsp027170@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 ------- Comment #3 from jiandingzhe at msn.com 2009-07-05 03:12 EST ------- You may run the debug script by ./readtree.pl outtree -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 5 10:43:04 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 5 Jul 2009 10:43:04 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907051443.n65Eh4Nw029119@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Severity|major |minor Target Milestone|--- |1.6.2 point release ------- Comment #4 from cjfields at bioperl.org 2009-07-05 10:43 EST ------- Strange bug, and it should be looked into, but you are calling the methods in an odd way: my $IN = Bio::TreeIO->new(-file=>$file_tree); my $root = $IN->next_tree->get_root_node; Your first version works in the test script, as well as this: my $in = Bio::TreeIO->new(-file=>$file_tree); my $tree= $in->next_tree; my $root=$tree->get_root_node; So, maybe something in how the iterator is creating the generated object. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 5 11:07:22 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 5 Jul 2009 11:07:22 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907051507.n65F7M7w029931@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 ------- Comment #5 from cjfields at bioperl.org 2009-07-05 11:07 EST ------- This is happening b/c, in your code example, the returned Tree is garbage-collected (and with it all node references to one another). Since you have assigned the root node to a variable, it is retained but everything else disappears. Thus you only end up with the root node and nothing else. I can leave this open for now, but it isn't a major bug if you call in the right context. We should (at the least) add a note about this to Bio::Tree::Tree if it isn't already present. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From rmb32 at cornell.edu Sun Jul 5 12:39:59 2009 From: rmb32 at cornell.edu (Robert Buels) Date: Sun, 05 Jul 2009 09:39:59 -0700 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: <1878A553A5E84629B9F66D15DFB662B7@NewLife> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> Message-ID: <4A50D75F.70209@cornell.edu> Mark A. Jensen wrote: > [ I would add my personal 'yikes!' to two unsynched repos.] No need for any yiking. This is no different from any other way of making a branch. The only difference is the mechanisms that are used for propagating and merging changes. In the case of an svn branch, it's the execrable svn merge (in both directions), and in the case of this git branch, it's the only slightly less execrable git-svn. Rob -- Robert Buels Bioinformatics Analyst, Sol Genomics Network Boyce Thompson Institute for Plant Research Tower Rd Ithaca, NY 14853 Tel: 503-889-8539 rmb32 at cornell.edu http://www.sgn.cornell.edu From rmb32 at cornell.edu Sun Jul 5 12:36:51 2009 From: rmb32 at cornell.edu (Robert Buels) Date: Sun, 05 Jul 2009 09:36:51 -0700 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> Message-ID: <4A50D6A3.90301@cornell.edu> Chris Fields wrote: > Robert, what is the intended long-term purpose of the github repo? From It's essentially just a feature branch that we made for the yapc hackathon. I was planning on merging it back into svn trunk using git-svn. There's still one more issue to tie up with it before I want to merge it (which will appear as a string of svn commits). I disagree that it's impossible to keep in sync. Moose, does this, for example, although they use two git repos instead of a git and an svn. But anyway, I wasn't planning to do that. Although it certainly would be nice to use git instead of svn...sigh. Rob -- Robert Buels Bioinformatics Analyst, Sol Genomics Network Boyce Thompson Institute for Plant Research Tower Rd Ithaca, NY 14853 Tel: 503-889-8539 rmb32 at cornell.edu http://www.sgn.cornell.edu From cjfields at illinois.edu Sun Jul 5 17:05:28 2009 From: cjfields at illinois.edu (Chris Fields) Date: Sun, 5 Jul 2009 16:05:28 -0500 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: <4A50D6A3.90301@cornell.edu> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> <4A50D6A3.90301@cornell.edu> Message-ID: <291DE67D-9F9D-4D3A-B627-A8B75C514CA3@illinois.edu> On Jul 5, 2009, at 11:36 AM, Robert Buels wrote: > Chris Fields wrote: >> Robert, what is the intended long-term purpose of the github repo? >> From > > It's essentially just a feature branch that we made for the yapc > hackathon. I was planning on merging it back into svn trunk using > git-svn. There's still one more issue to tie up with it before I > want to merge it (which will appear as a string of svn commits). Okay. I still need to co the github repo (July 4th got in the way) but I will do that today and tomorrow. From what I have seen I don't think the fixes will be an issue, though. > I disagree that it's impossible to keep in sync. Moose, does this, > for example, although they use two git repos instead of a git and an > svn. But anyway, I wasn't planning to do that. I don't think it's impossible, just more to worry about and harder to maintain. And the less we have to worry about the better. > Although it certainly would be nice to use git instead of svn...sigh. > > Rob Ah, yes I agree, but we would have to convince the other devs that it would be worth it, and I don't think we'll be able to do that after recently switching to svn ;> chris From rbuels at dev.open-bio.org Sun Jul 5 17:35:51 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Sun, 5 Jul 2009 17:35:51 -0400 Subject: [Bioperl-guts-l] [15828] bioperl-live/branches/yapc10hackathon/: made branch for yapc10hackathon bugfixes Message-ID: <200907052135.n65LZpkd014067@dev.open-bio.org> Revision: 15828 Author: rbuels Date: 2009-07-05 17:35:50 -0400 (Sun, 05 Jul 2009) Log Message: ----------- made branch for yapc10hackathon bugfixes Added Paths: ----------- bioperl-live/branches/yapc10hackathon/ Copied: bioperl-live/branches/yapc10hackathon (from rev 15827, bioperl-live/trunk) From rbuels at dev.open-bio.org Sun Jul 5 17:45:52 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Sun, 5 Jul 2009 17:45:52 -0400 Subject: [Bioperl-guts-l] [15829] bioperl-live/branches/yapc10hackathon: Merging in yapc2009 hackathon commits. Message-ID: <200907052145.n65Ljq7M014161@dev.open-bio.org> Revision: 15829 Author: rbuels Date: 2009-07-05 17:45:51 -0400 (Sun, 05 Jul 2009) Log Message: ----------- Merging in yapc2009 hackathon commits. Fixes for bugs 2346, 2537, 2844, 2847, and 2850. Commit messages (copied from git) below. commit 50b7ca41d9ad4ba4c99200877976a73982ed2f9c Author: Bruno Vecchi Date: Fri Jun 26 00:08:09 2009 -0300 fixed bug 2850 commit debe4cf89c2ab21c426753f5040cfd5bd53b67fb Author: Jay Hannah Date: Thu Jun 25 21:49:42 2009 -0500 [2346] Demonstrating the parse failure. Yup, looks like it's still FAIL. commit 558aa72f8843cc8ddb9c71d824c6302d8d4fb2d0 Author: Bruno Vecchi Date: Thu Jun 25 22:53:12 2009 -0300 fix for bug 2847 commit 60aae8aff24c2d9b83b48fd12737030be6f0cba5 Author: Jay Hannah Date: Thu Jun 25 20:20:20 2009 -0500 [2842] Added mask_columns(). Made podchecker happier. commit 59e909b9572c95df473a78b355eda087b33b7441 Author: Bruno Vecchi Date: Thu Jun 25 21:51:30 2009 -0300 applied patch 2844 -- add backtranslate method to Bio::Tools::SeqPattern with tests commit 6129e7e40bf1aa33a4f953a3d29f9ea7598a56c9 Author: Jay Hannah Date: Thu Jun 25 19:28:33 2009 -0500 [2537] Huh. Someone already fixed this at some point. commit 501519409b07b9dac119bdb436bfdf5819309ebd Author: Robert Buels Date: Thu Jun 25 17:20:56 2009 -0700 added findbin and test::more stuff to 2537 regression test commit b526871af10d95a53e8d67a9921af5f60e5bad6d Author: Jay Hannah Date: Thu Jun 25 19:16:31 2009 -0500 Test for this bug. commit aeedb413de7c0a5382f27dd0689e0cfc474cc56a Author: Robert Buels Date: Thu Jun 25 17:07:59 2009 -0700 added test dir for bugzilla #2537 regression Modified Paths: -------------- bioperl-live/branches/yapc10hackathon/Bio/SearchIO/exonerate.pm bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm bioperl-live/branches/yapc10hackathon/Bio/Seq/Quality.pm bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm bioperl-live/branches/yapc10hackathon/Bio/Tools/SeqPattern.pm bioperl-live/branches/yapc10hackathon/t/Align/SimpleAlign.t bioperl-live/branches/yapc10hackathon/t/SearchIO/exonerate.t bioperl-live/branches/yapc10hackathon/t/SeqTools/SeqPattern.t Added Paths: ----------- bioperl-live/branches/yapc10hackathon/Bio/Tools/SeqPattern/ bioperl-live/branches/yapc10hackathon/Bio/Tools/SeqPattern/Backtranslate.pm bioperl-live/branches/yapc10hackathon/t/SeqTools/Backtranslate.t bioperl-live/branches/yapc10hackathon/t/bugzilla/ bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/ bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/exonerate.output.dontwork bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/exonerate.output.works bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/in.t bioperl-live/branches/yapc10hackathon/t/bugzilla/2537/ bioperl-live/branches/yapc10hackathon/t/bugzilla/2537/in.fasta bioperl-live/branches/yapc10hackathon/t/bugzilla/2537/in.t bioperl-live/branches/yapc10hackathon/t/bugzilla/2842/ bioperl-live/branches/yapc10hackathon/t/bugzilla/2842/mask.t bioperl-live/branches/yapc10hackathon/t/bugzilla/2844/ bioperl-live/branches/yapc10hackathon/t/bugzilla/2844/note.txt bioperl-live/branches/yapc10hackathon/t/bugzilla/2847/ bioperl-live/branches/yapc10hackathon/t/bugzilla/2847/bug.t bioperl-live/branches/yapc10hackathon/t/bugzilla/2847/test_clear_range.fastq bioperl-live/branches/yapc10hackathon/t/bugzilla/2850/ bioperl-live/branches/yapc10hackathon/t/bugzilla/2850/bug.t bioperl-live/branches/yapc10hackathon/t/bugzilla/2850/headerless.psl bioperl-live/branches/yapc10hackathon/t/bugzilla/README Modified: bioperl-live/branches/yapc10hackathon/Bio/SearchIO/exonerate.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/SearchIO/exonerate.pm 2009-07-05 21:35:50 UTC (rev 15828) +++ bioperl-live/branches/yapc10hackathon/Bio/SearchIO/exonerate.pm 2009-07-05 21:45:51 UTC (rev 15829) @@ -250,7 +250,7 @@ $self->{'_seencigar'} = 0; $self->{'_vulgar'} = 0; } elsif( s/^vulgar:\s+(\S+)\s+ # query sequence id - (\d+)\s+(\d+)\s+([\-\+])\s+ # query start-end-strand + (\d+)\s+(\d+)\s+([\-\+\.])\s+ # query start-end-strand (\S+)\s+ # target sequence id (\d+)\s+(\d+)\s+([\-\+])\s+ # target start-end-strand (\d+)\s+ # score Modified: bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm 2009-07-05 21:35:50 UTC (rev 15828) +++ bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm 2009-07-05 21:45:51 UTC (rev 15829) @@ -184,7 +184,7 @@ #clear header if exists if(/^psLayout/){ #pass over header lines lines - while(!/^\d+\s+\d+\s+/) { + while((!/^\d+\s+\d+\s+/) && defined $_) { $_ = $self->_readline; } } @@ -196,7 +196,8 @@ $block_sizes, $q_starts, $t_starts ) = split; - my $score = sprintf "%.2f", ( 100 * ( $matches + $mismatches + $rep_matches ) / $q_length ); + my $score = eval { sprintf "%.2f", ( 100 * ( $matches + $mismatches + $rep_matches ) / $q_length ) }; + next if $@; # this is overall percent identity... my $percent_id = sprintf "%.2f", ( 100 * ($matches + $rep_matches)/( $matches + $mismatches + $rep_matches ) Modified: bioperl-live/branches/yapc10hackathon/Bio/Seq/Quality.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/Seq/Quality.pm 2009-07-05 21:35:50 UTC (rev 15828) +++ bioperl-live/branches/yapc10hackathon/Bio/Seq/Quality.pm 2009-07-05 21:45:51 UTC (rev 15829) @@ -596,6 +596,9 @@ # populate the cache if needed $self->_find_clear_ranges unless defined $self->{_ranges}; + # fix for bug 2847 + return unless defined $self->{_ranges}; + # pick the longest for (sort {$b->{length} <=> $a->{length} } @{$self->{_ranges}} ){ Modified: bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm 2009-07-05 21:35:50 UTC (rev 15828) +++ bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm 2009-07-05 21:45:51 UTC (rev 15829) @@ -97,11 +97,11 @@ http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support - + Please direct usage questions or support issues to the mailing list: - + L - + rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem @@ -3180,4 +3180,57 @@ $self->num_sequences(@_); } +=head2 mask_columns + + Title : mask_columns + Usage : $aln2 = $aln->mask_columns(20,30) + Function : Masks a slice of the alignment inclusive of start and + end columns, and the first column in the alignment is denoted 1. + Mask beyond the length of the sequence does not do padding. + Returns : A Bio::SimpleAlign object + Args : Positive integer for start column, positive integer for end column, + optional string value use for the mask. Example: + + $aln2 = $aln->mask_columns(20,30,'?') + +=cut + +sub mask_columns { + #based on slice(), but did not include the Bio::Seq::Meta sections as I was not sure what it is doing + my $self = shift; + + my ($start, $end, $mask_char) = @_; + unless (defined $mask_char) { $mask_char = 'N' } + + $self->throw("Mask start has to be a positive integer, not [$start]") + unless $start =~ /^\d+$/ and $start > 0; + $self->throw("Mask end has to be a positive integer, not [$end]") + unless $end =~ /^\d+$/ and $end > 0; + $self->throw("Mask start [$start] has to be smaller than or equal to end [$end]") + unless $start <= $end; + $self->throw("This alignment has only ". $self->length . " residues. Mask start " . + "[$start] is too big.") if $start > $self->length; + $self->throw("Mask character $mask_char has to be a single character") + unless length($mask_char) == 1; + + my $aln = $self->new; + $aln->id($self->id); + foreach my $seq ( $self->each_seq() ) { + my $new_seq = Bio::LocatableSeq->new(-id => $seq->id, + -alphabet => $seq->alphabet, + -strand => $seq->strand, + -verbose => $self->verbose); + + my $mask_end = $end; + $mask_end = $seq->length if( $end > $seq->length ); + my $masked_string = $mask_char x ($mask_end - $start +1); + my $new_dna_string = substr($seq->seq,0,$start-1) . $masked_string . substr($seq->seq,$mask_end); + $new_seq->seq($new_dna_string); + $aln->add_seq($new_seq); + } + return $aln; +} + + + 1; Added: bioperl-live/branches/yapc10hackathon/Bio/Tools/SeqPattern/Backtranslate.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/Tools/SeqPattern/Backtranslate.pm (rev 0) +++ bioperl-live/branches/yapc10hackathon/Bio/Tools/SeqPattern/Backtranslate.pm 2009-07-05 21:45:51 UTC (rev 15829) @@ -0,0 +1,481 @@ +package Bio::Tools::SeqPattern::Backtranslate; +use strict; +use warnings; + +use base qw(Bio::Root::Root); +use base qw(Exporter); + +=head1 NAME + +Bio::Tools::SeqPattern::Backtranslate + +=head1 DESCRIPTION + +This module should not be used directly. It provides helper methods to +Bio::Tools::SeqPattern to reverse translate protein patterns. + +=cut + +use Bio::Seq; +use Bio::Tools::CodonTable; + +use List::MoreUtils qw(uniq); +use Carp qw(croak); + +our @EXPORT_OK = qw(_reverse_translate_motif); + +our @EXPORT = @EXPORT_OK; + +sub _reverse_translate_motif { + # Main subroutine. It takes a Profam-like motif and returns its + # reverse translation using degenerate codons. + + # Steps: + # 1. Tokenize, then parse tokens. + # 2. Reverse translate each token type. + # 3. Join tokens in original order. Return the resulting string. + + my $motif = shift; + + $motif =~ s/\./X/g; + $motif = uc $motif; + + ### 1. Tokenize, parse the motif. + my ( $ordered, $classified ) = _parse_motif($motif); + + ### 2. Reverse translate each token type. + # Reverse translate the plain (unambiguous) tokens. + my $ct = Bio::Tools::CodonTable->new; + foreach my $seq ( @{ $classified->{plain} } ) { + my $seqO + = Bio::Seq->new( -seq => $$seq, -alphabet => 'protein' ); + $$seq = $ct->reverse_translate_all($seqO); + } + + # Reverse translate the ambiguous tokens. + foreach my $token ( @{ $classified->{ambiguous} } ) { + my ($aas) = $$token =~ m(([A-Za-z\.]+)); + my @codons_to_contract; + + foreach my $residue ( split '', $aas ) { + push @codons_to_contract, $ct->revtranslate($residue); + } + + my $ambiguous_codon = _contract_codons(@codons_to_contract); + $$token = $ambiguous_codon; + } + + # Reverse translate the negated residues. + foreach my $token ( @{ $classified->{negated} } ) { + my ($aas) = $$token =~ m(([A-Za-z\.]+)); + my $ambiguous_codon = _negated_aas_to_codon($aas); + $$token = $ambiguous_codon; + } + + ### 3. Join the profile back from its tokens. + return join '', map {$$_} @{$ordered}; + +} + +sub _parse_motif { + # Profam-like motif parser. It takes the pattern as a string, and + # returns two data structures that contain the tokens, organized + # by order of appearance in the pattern (first return value) and by + # category (second return value). + + my $motif = shift; + my $parser = _tokenize_motif($motif); + my ( %tokens, @tokens ); + + while ( my $token = $parser->() ) { + croak ("Unknown syntax token: <", $token->[1], ">") + if ( $token->[0] eq 'UNKNOWN' ); + push @{ $tokens{ $token->[0] } }, \$token->[1]; + push @tokens, \$token->[1]; + } + return ( \@tokens, \%tokens ); +} + +sub _tokenize_motif { + + # Return a tokenizer iterator that sequentially recognizes and + # returns each token in the input pattern. + # Examples of each token type: + + # ambiguous: a position with more than one possible residue. + # eg. [ALEP] + # negated: a position in which some residues are excluded. + # eg. [^WY] + # plain: a common sequence of residues. One position, one residue. + # eg. MAAEIK + # open_par, close_par: tags surrounding a motif that is repeated + # a certain number of times. + # eg. (...){3} + + my $target = shift; + return sub { + return [ 'ambiguous', $1 ] + if $target =~ /\G (\[[A-Za-z\.]+\]) /gcx; + return [ 'negated', $1 ] + if $target =~ /\G (\[\^[A-Za-z\.]+\]) /gcx; + return [ 'plain', $1 ] + if $target =~ /\G ([A-Za-z\.]+) /gcx; + return [ 'open_par', $1 ] + if $target =~ /\G (\() /gcx; + return [ 'close_par', $1 ] + if $target =~ /\G (\)[\{\d+[,\d+]*\}]*) /gcx; + return [ 'UNKNOWN', $1 ] + if $target =~ /\G (.) /gcx; + return; + }; +} + +sub _contract_codons { + + # Take a list of codons, return an ambiguous codon. + my @codons = map { uc $_ } @_; + + my @by_letter = ( [], [], [], ); + my $ambiguous_codon; + foreach my $codon (@codons) { + my @letters = split '', $codon; + for my $i ( 0 .. 2 ) { + push @{ $by_letter[$i] }, $letters[$i]; + } + } + for my $i ( 0 .. 2 ) { + $ambiguous_codon + .= _convert( 'dna', _uniq_string( @{ $by_letter[$i] } ) ); + } + return $ambiguous_codon; +} + @@ Diff output truncated at 10000 characters. @@ From rmb32 at cornell.edu Sun Jul 5 17:48:24 2009 From: rmb32 at cornell.edu (Robert Buels) Date: Sun, 05 Jul 2009 14:48:24 -0700 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: <291DE67D-9F9D-4D3A-B627-A8B75C514CA3@illinois.edu> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> <4A50D6A3.90301@cornell.edu> <291DE67D-9F9D-4D3A-B627-A8B75C514CA3@illinois.edu> Message-ID: <4A511FA8.5010101@cornell.edu> Chris Fields wrote: > Okay. I still need to co the github repo (July 4th got in the way) but > I will do that today and tomorrow. From what I have seen I don't think > the fixes will be an issue, though. I went ahead and put all the stuff in an svn branch called yapc10hackathon, and deleted the github repo. Have a look there. Rob From bugzilla-daemon at portal.open-bio.org Sun Jul 5 17:49:58 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 5 Jul 2009 17:49:58 -0400 Subject: [Bioperl-guts-l] [Bug 2346] exonerate parser in bioperl-live fails when protein2dna comparison is performed In-Reply-To: Message-ID: <200907052149.n65Lnwua011912@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2346 rmb32 at cornell.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|ASSIGNED |RESOLVED Resolution| |FIXED ------- Comment #4 from rmb32 at cornell.edu 2009-07-05 17:49 EST ------- Parse completes, at least, with the addition of the patch below. After studying it for a while, I'm not completely sure, but I suspect that with this one addition that it actually works correctly with this use case of protein query sequences, because the exonerate.pm code appears to be correctly using the differing VULGAR query and subject lengths. But does the CIGAR also work? Might be a good idea to look at this further. But this particular bug is now closed, I think. --- a/Bio/SearchIO/exonerate.pm +++ b/Bio/SearchIO/exonerate.pm @@ -250,7 +250,7 @@ sub next_result{ $self->{'_seencigar'} = 0; $self->{'_vulgar'} = 0; } elsif( s/^vulgar:\s+(\S+)\s+ # query sequence id - (\d+)\s+(\d+)\s+([\-\+])\s+ # query start-end-strand + (\d+)\s+(\d+)\s+([\-\+\.])\s+ # query start-end-strand (\S+)\s+ # target sequence id (\d+)\s+(\d+)\s+([\-\+])\s+ # target start-end-strand (\d+)\s+ # score -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at illinois.edu Sun Jul 5 18:56:15 2009 From: cjfields at illinois.edu (Chris Fields) Date: Sun, 5 Jul 2009 17:56:15 -0500 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: <4A511FA8.5010101@cornell.edu> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> <4A50D6A3.90301@cornell.edu> <291DE67D-9F9D-4D3A-B627-A8B75C514CA3@illinois.edu> <4A511FA8.5010101@cornell.edu> Message-ID: <5BD710CF-DFA5-46A5-8A2A-81C13FB34A23@illinois.edu> I'll give it a look-over. There was one fix in the initial lookover that works but is a bit off: + while((!/^\d+\s+\d+\s+/) && defined $_) { should be + while(defined $_ && (!/^\d+\s+\d+\s+/)) { correct? Otherwise the defined check is run after the regex against $_. chris On Jul 5, 2009, at 4:48 PM, Robert Buels wrote: > Chris Fields wrote: >> Okay. I still need to co the github repo (July 4th got in the way) >> but I will do that today and tomorrow. From what I have seen I >> don't think the fixes will be an issue, though. > > I went ahead and put all the stuff in an svn branch called > yapc10hackathon, and deleted the github repo. Have a look there. > > Rob > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l From rmb32 at cornell.edu Mon Jul 6 00:40:01 2009 From: rmb32 at cornell.edu (Robert Buels) Date: Sun, 05 Jul 2009 21:40:01 -0700 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: <5BD710CF-DFA5-46A5-8A2A-81C13FB34A23@illinois.edu> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> <4A50D6A3.90301@cornell.edu> <291DE67D-9F9D-4D3A-B627-A8B75C514CA3@illinois.edu> <4A511FA8.5010101@cornell.edu> <5BD710CF-DFA5-46A5-8A2A-81C13FB34A23@illinois.edu> Message-ID: <4A518021.2040804@cornell.edu> Yep, that's probably better. Actually, now that I'm looking at psl.pm, it needs some work. I'm going to poke at it a bit. Also, one of the t/bugzilla/ tests written during the hackathon for some fastq-related thing is failing with the recent changes to trunk. Could you switch to this branch and take a look? Do prove -w -I. -rv t/bugzilla/2847/bug.t to see it. Rob Chris Fields wrote: > I'll give it a look-over. There was one fix in the initial lookover > that works but is a bit off: > > + while((!/^\d+\s+\d+\s+/) && defined $_) { > > should be > > + while(defined $_ && (!/^\d+\s+\d+\s+/)) { > > correct? Otherwise the defined check is run after the regex against $_. > > chris > > On Jul 5, 2009, at 4:48 PM, Robert Buels wrote: > >> Chris Fields wrote: >>> Okay. I still need to co the github repo (July 4th got in the way) >>> but I will do that today and tomorrow. From what I have seen I don't >>> think the fixes will be an issue, though. >> >> I went ahead and put all the stuff in an svn branch called >> yapc10hackathon, and deleted the github repo. Have a look there. >> >> Rob >> _______________________________________________ >> Bioperl-guts-l mailing list >> Bioperl-guts-l at lists.open-bio.org >> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l > > -- Robert Buels Bioinformatics Analyst, Sol Genomics Network Boyce Thompson Institute for Plant Research Tower Rd Ithaca, NY 14853 Tel: 503-889-8539 rmb32 at cornell.edu http://www.sgn.cornell.edu From rbuels at dev.open-bio.org Mon Jul 6 00:49:06 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Mon, 6 Jul 2009 00:49:06 -0400 Subject: [Bioperl-guts-l] [15830] bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm: squashed ambiguous call warning Message-ID: <200907060449.n664n65u015236@dev.open-bio.org> Revision: 15830 Author: rbuels Date: 2009-07-06 00:49:06 -0400 (Mon, 06 Jul 2009) Log Message: ----------- squashed ambiguous call warning Modified Paths: -------------- bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm Modified: bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm 2009-07-05 21:45:51 UTC (rev 15829) +++ bioperl-live/branches/yapc10hackathon/Bio/SimpleAlign.pm 2009-07-06 04:49:06 UTC (rev 15830) @@ -3198,10 +3198,10 @@ sub mask_columns { #based on slice(), but did not include the Bio::Seq::Meta sections as I was not sure what it is doing my $self = shift; - + my ($start, $end, $mask_char) = @_; unless (defined $mask_char) { $mask_char = 'N' } - + $self->throw("Mask start has to be a positive integer, not [$start]") unless $start =~ /^\d+$/ and $start > 0; $self->throw("Mask end has to be a positive integer, not [$end]") @@ -3211,8 +3211,8 @@ $self->throw("This alignment has only ". $self->length . " residues. Mask start " . "[$start] is too big.") if $start > $self->length; $self->throw("Mask character $mask_char has to be a single character") - unless length($mask_char) == 1; - + unless CORE::length($mask_char) == 1; + my $aln = $self->new; $aln->id($self->id); foreach my $seq ( $self->each_seq() ) { @@ -3220,7 +3220,7 @@ -alphabet => $seq->alphabet, -strand => $seq->strand, -verbose => $self->verbose); - + my $mask_end = $end; $mask_end = $seq->length if( $end > $seq->length ); my $masked_string = $mask_char x ($mask_end - $start +1); From rbuels at dev.open-bio.org Mon Jul 6 01:07:48 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Mon, 6 Jul 2009 01:07:48 -0400 Subject: [Bioperl-guts-l] [15831] bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm: perltidy on SearchIO/psl.pm Message-ID: <200907060507.n6657mUC015312@dev.open-bio.org> Revision: 15831 Author: rbuels Date: 2009-07-06 01:07:48 -0400 (Mon, 06 Jul 2009) Log Message: ----------- perltidy on SearchIO/psl.pm Modified Paths: -------------- bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm Modified: bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm 2009-07-06 04:49:06 UTC (rev 15830) +++ bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm 2009-07-06 05:07:48 UTC (rev 15831) @@ -2,7 +2,7 @@ # # BioPerl module for Bio::SearchIO::psl # -# Please direct questions and support issues to +# Please direct questions and support issues to # # Cared for by Jason Stajich # @@ -75,10 +75,8 @@ =cut - # Let the code begin... - package Bio::SearchIO::psl; use vars qw(%MAPPING %MODEMAP $DEFAULT_WRITER_CLASS $DefaultProgramName); @@ -87,55 +85,55 @@ use Bio::Search::Hit::HitFactory; use Bio::Search::Result::ResultFactory; -$DefaultProgramName = 'BLAT'; +$DefaultProgramName = 'BLAT'; $DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter'; # mapping of terms to Bioperl hash keys %MODEMAP = ( - 'PSLOutput' => 'result', - 'Result' => 'result', - 'Hit' => 'hit', - 'Hsp' => 'hsp' - ); + 'PSLOutput' => 'result', + 'Result' => 'result', + 'Hit' => 'hit', + 'Hsp' => 'hsp' +); -%MAPPING = ( - 'Hsp_bit-score' => 'HSP-bits', - 'Hsp_score' => 'HSP-score', - 'Hsp_evalue' => 'HSP-evalue', - 'Hsp_query-from' => 'HSP-query_start', - 'Hsp_query-to' => 'HSP-query_end', - 'Hsp_hit-from' => 'HSP-hit_start', - 'Hsp_hit-to' => 'HSP-hit_end', - 'Hsp_positive' => 'HSP-conserved', - 'Hsp_identity' => 'HSP-identical', - 'Hsp_mismatches' => 'HSP-mismatches', - 'Hsp_qgapblocks' => 'HSP-query_gapblocks', - 'Hsp_hgapblocks' => 'HSP-hit_gapblocks', - 'Hsp_gaps' => 'HSP-hsp_gaps', - 'Hsp_hitgaps' => 'HSP-hit_gaps', - 'Hsp_querygaps' => 'HSP-query_gaps', - 'Hsp_align-len' => 'HSP-hsp_length', - 'Hsp_query-frame'=> 'HSP-query_frame', - 'Hsp_hit-frame' => 'HSP-hit_frame', +%MAPPING = ( + 'Hsp_bit-score' => 'HSP-bits', + 'Hsp_score' => 'HSP-score', + 'Hsp_evalue' => 'HSP-evalue', + 'Hsp_query-from' => 'HSP-query_start', + 'Hsp_query-to' => 'HSP-query_end', + 'Hsp_hit-from' => 'HSP-hit_start', + 'Hsp_hit-to' => 'HSP-hit_end', + 'Hsp_positive' => 'HSP-conserved', + 'Hsp_identity' => 'HSP-identical', + 'Hsp_mismatches' => 'HSP-mismatches', + 'Hsp_qgapblocks' => 'HSP-query_gapblocks', + 'Hsp_hgapblocks' => 'HSP-hit_gapblocks', + 'Hsp_gaps' => 'HSP-hsp_gaps', + 'Hsp_hitgaps' => 'HSP-hit_gaps', + 'Hsp_querygaps' => 'HSP-query_gaps', + 'Hsp_align-len' => 'HSP-hsp_length', + 'Hsp_query-frame' => 'HSP-query_frame', + 'Hsp_hit-frame' => 'HSP-hit_frame', - 'Hit_id' => 'HIT-name', - 'Hit_len' => 'HIT-length', - 'Hit_accession' => 'HIT-accession', - 'Hit_def' => 'HIT-description', - 'Hit_signif' => 'HIT-significance', - 'Hit_score' => 'HIT-score', - 'Hit_bits' => 'HIT-bits', + 'Hit_id' => 'HIT-name', + 'Hit_len' => 'HIT-length', + 'Hit_accession' => 'HIT-accession', + 'Hit_def' => 'HIT-description', + 'Hit_signif' => 'HIT-significance', + 'Hit_score' => 'HIT-score', + 'Hit_bits' => 'HIT-bits', - 'PSLOutput_program' => 'RESULT-algorithm_name', - 'PSLOutput_version' => 'RESULT-algorithm_version', - 'PSLOutput_query-def'=> 'RESULT-query_name', - 'PSLOutput_query-len'=> 'RESULT-query_length', - 'PSLOutput_query-acc'=> 'RESULT-query_accession', - 'PSLOutput_querydesc'=> 'RESULT-query_description', - 'PSLOutput_db' => 'RESULT-database_name', - 'PSLOutput_db-len' => 'RESULT-database_entries', - 'PSLOutput_db-let' => 'RESULT-database_letters', - ); + 'PSLOutput_program' => 'RESULT-algorithm_name', + 'PSLOutput_version' => 'RESULT-algorithm_version', + 'PSLOutput_query-def' => 'RESULT-query_name', + 'PSLOutput_query-len' => 'RESULT-query_length', + 'PSLOutput_query-acc' => 'RESULT-query_accession', + 'PSLOutput_querydesc' => 'RESULT-query_description', + 'PSLOutput_db' => 'RESULT-database_name', + 'PSLOutput_db-len' => 'RESULT-database_entries', + 'PSLOutput_db-let' => 'RESULT-database_letters', +); use base qw(Bio::SearchIO); @@ -151,18 +149,31 @@ =cut sub _initialize { - my ($self, at args) = @_; + my ( $self, @args ) = @_; $self->SUPER::_initialize(@args); - my ($pname) = $self->_rearrange([qw(PROGRAM_NAME)], - @args); - $self->program_name($pname || $DefaultProgramName); - $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult')); + my ($pname) = $self->_rearrange( [qw(PROGRAM_NAME)], @args ); + $self->program_name( $pname || $DefaultProgramName ); + $self->_eventHandler->register_factory( + 'result', + Bio::Search::Result::ResultFactory->new( + -type => 'Bio::Search::Result::GenericResult' + ) + ); - $self->_eventHandler->register_factory('hit', Bio::Search::Hit::HitFactory->new(-type => 'Bio::Search::Hit::GenericHit')); - $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::PSLHSP')); + $self->_eventHandler->register_factory( + 'hit', + Bio::Search::Hit::HitFactory->new( + -type => 'Bio::Search::Hit::GenericHit' + ) + ); + $self->_eventHandler->register_factory( + 'hsp', + Bio::Search::HSP::HSPFactory->new( + -type => 'Bio::Search::HSP::PSLHSP' + ) + ); } - =head2 next_result Title : next_result @@ -174,136 +185,255 @@ =cut -sub next_result{ - my ($self) = @_; - my ($lastquery,$lasthit); - local $/ = "\n"; - local $_; +sub next_result { + my ($self) = @_; + my ( $lastquery, $lasthit ); + local $/ = "\n"; + local $_; - while( defined ($_ = $self->_readline) ) { - #clear header if exists - if(/^psLayout/){ - #pass over header lines lines - while((!/^\d+\s+\d+\s+/) && defined $_) { - $_ = $self->_readline; - } - } - my ( $matches,$mismatches,$rep_matches,$n_count, - $q_num_insert,$q_base_insert, - $t_num_insert, $t_base_insert, - $strand, $q_name, $q_length, $q_start, - $q_end, $t_name, $t_length,$t_start, $t_end, $block_count, - $block_sizes, $q_starts, $t_starts - ) = split; + while ( defined( $_ = $self->_readline ) ) { - my $score = eval { sprintf "%.2f", ( 100 * ( $matches + $mismatches + $rep_matches ) / $q_length ) }; - next if $@; + #clear header if exists + if (/^psLayout/) { - # this is overall percent identity... - my $percent_id = sprintf "%.2f", ( 100 * ($matches + $rep_matches)/( $matches + $mismatches + $rep_matches ) -); + #pass over header lines lines + while ( ( !/^\d+\s+\d+\s+/ ) && defined $_ ) { + $_ = $self->_readline; + } + } + my ( + $matches, $mismatches, $rep_matches, $n_count, + $q_num_insert, $q_base_insert, $t_num_insert, $t_base_insert, + $strand, $q_name, $q_length, $q_start, + $q_end, $t_name, $t_length, $t_start, + $t_end, $block_count, $block_sizes, $q_starts, + $t_starts + ) = split; - # Remember Jim's code is 0 based - if( defined $lastquery && - $lastquery ne $q_name ) { - $self->end_element({'Name' => 'Hit'}); - $self->end_element({'Name' => 'PSLOutput'}); - $self->_pushback($_); - return $self->end_document; - } elsif( ! defined $lastquery ) { - $self->{'_result_count'}++; - $self->start_element({'Name' => 'PSLOutput'}); - $self->element({'Name' => 'PSLOutput_program', - 'Data' => $self->program_name}); - $self->element({'Name' => 'PSLOutput_query-def', - 'Data' => $q_name}); - $self->element({'Name' => 'PSLOutput_query-len', - 'Data' => $q_length}); - $self->start_element({'Name' => 'Hit'}); - $self->element({'Name' => 'Hit_id', - 'Data' => $t_name}); - $self->element({'Name' => 'Hit_len', - 'Data' => $t_length}); - $self->element({'Name' => 'Hit_score', - 'Data' => $score}); - } elsif( $lasthit ne $t_name ) { - $self->end_element({'Name' => 'Hit'}); - $self->start_element({'Name' => 'Hit'}); - $self->element({'Name' => 'Hit_id', - 'Data' => $t_name}); - $self->element({'Name' => 'Hit_len', - 'Data' => $t_length}); - $self->element({'Name' => 'Hit_score', - 'Data' => $score}); - } - - my $identical = $matches + $rep_matches; - $self->start_element({'Name' => 'Hsp'}); - $self->element({'Name' => 'Hsp_score', - 'Data' => $score}); - $self->element({'Name' => 'Hsp_identity', - 'Data' => $identical}); - $self->element({'Name' => 'Hsp_positive', - 'Data' => $identical}); - $self->element({'Name' => 'Hsp_mismatches', - 'Data' => $mismatches}); - $self->element({'Name' => 'Hsp_gaps', - 'Data' => $q_base_insert + $t_base_insert}); - # query gaps are the number of target inserts and vice-versa - $self->element({'Name' => 'Hsp_querygaps', - 'Data' => $t_base_insert}); - $self->element({'Name' => 'Hsp_hitgaps', - 'Data' => $q_base_insert}); - if( $strand eq '+' ) { - $self->element({'Name' => 'Hsp_query-from', - 'Data' => $q_start + 1}); - $self->element({'Name' => 'Hsp_query-to', - 'Data' => $q_end}); - } else { - $self->element({'Name' => 'Hsp_query-to', - 'Data' => $q_start + 1}); - $self->element({'Name' => 'Hsp_query-from', - 'Data' => $q_end}); - } - my $hsplen = $q_base_insert + $t_base_insert + - abs( $t_end - $t_start) + abs( $q_end - $q_start); @@ Diff output truncated at 10000 characters. @@ From cjfields at illinois.edu Mon Jul 6 01:31:09 2009 From: cjfields at illinois.edu (Chris Fields) Date: Mon, 6 Jul 2009 00:31:09 -0500 Subject: [Bioperl-guts-l] [Bug 2844] Patch to add "revtrans" method toBio::Tools::SeqPattern In-Reply-To: <4A518021.2040804@cornell.edu> References: <200906260123.n5Q1NBHr010363@portal.open-bio.org><55E9EFB6-75D6-467C-B3E2-32580618DF9E@bioperl.org> <1878A553A5E84629B9F66D15DFB662B7@NewLife> <4A50D6A3.90301@cornell.edu> <291DE67D-9F9D-4D3A-B627-A8B75C514CA3@illinois.edu> <4A511FA8.5010101@cornell.edu> <5BD710CF-DFA5-46A5-8A2A-81C13FB34A23@illinois.edu> <4A518021.2040804@cornell.edu> Message-ID: I'll do the co tomorrow. The fastq changes were massive (almost complete rewrite, actually), I'll check it out. BTW, there'll be more fastq mods on the way. chris On Jul 5, 2009, at 11:40 PM, Robert Buels wrote: > Yep, that's probably better. Actually, now that I'm looking at > psl.pm, it needs some work. I'm going to poke at it a bit. > > Also, one of the t/bugzilla/ tests written during the hackathon for > some fastq-related thing is failing with the recent changes to > trunk. Could > you switch to this branch and take a look? > > Do prove -w -I. -rv t/bugzilla/2847/bug.t to see it. > > Rob > > Chris Fields wrote: >> I'll give it a look-over. There was one fix in the initial >> lookover that works but is a bit off: >> + while((!/^\d+\s+\d+\s+/) && defined $_) { >> should be >> + while(defined $_ && (!/^\d+\s+\d+\s+/)) { >> correct? Otherwise the defined check is run after the regex >> against $_. >> chris On Jul 5, 2009, at 4:48 PM, Robert Buels wrote: >>> Chris Fields wrote: >>>> Okay. I still need to co the github repo (July 4th got in the >>>> way) but I will do that today and tomorrow. From what I have >>>> seen I don't think the fixes will be an issue, though. >>> >>> I went ahead and put all the stuff in an svn branch called >>> yapc10hackathon, and deleted the github repo. Have a look there. >>> >>> Rob >>> _______________________________________________ >>> Bioperl-guts-l mailing list >>> Bioperl-guts-l at lists.open-bio.org >>> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l > > > -- > Robert Buels > Bioinformatics Analyst, Sol Genomics Network > Boyce Thompson Institute for Plant Research > Tower Rd > Ithaca, NY 14853 > Tel: 503-889-8539 > rmb32 at cornell.edu > http://www.sgn.cornell.edu > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l From rbuels at dev.open-bio.org Mon Jul 6 01:42:42 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Mon, 6 Jul 2009 01:42:42 -0400 Subject: [Bioperl-guts-l] [15832] bioperl-live/branches/yapc10hackathon/t/bugzilla: squashed some warnings in tests from hackathon. Message-ID: <200907060542.n665ggnc015372@dev.open-bio.org> Revision: 15832 Author: rbuels Date: 2009-07-06 01:42:42 -0400 (Mon, 06 Jul 2009) Log Message: ----------- squashed some warnings in tests from hackathon. should have told people to run with prove -w Modified Paths: -------------- bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/in.t bioperl-live/branches/yapc10hackathon/t/bugzilla/2537/in.t Modified: bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/in.t =================================================================== --- bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/in.t 2009-07-06 05:07:48 UTC (rev 15831) +++ bioperl-live/branches/yapc10hackathon/t/bugzilla/2346/in.t 2009-07-06 05:42:42 UTC (rev 15832) @@ -12,7 +12,7 @@ parse($searchio); -my $searchio = Bio::SearchIO->new( +$searchio = Bio::SearchIO->new( -format => 'exonerate', -file => catfile($FindBin::Bin,'exonerate.output.dontwork'), ); Modified: bioperl-live/branches/yapc10hackathon/t/bugzilla/2537/in.t =================================================================== --- bioperl-live/branches/yapc10hackathon/t/bugzilla/2537/in.t 2009-07-06 05:07:48 UTC (rev 15831) +++ bioperl-live/branches/yapc10hackathon/t/bugzilla/2537/in.t 2009-07-06 05:42:42 UTC (rev 15832) @@ -7,7 +7,7 @@ my $searchio = Bio::SearchIO->new( -format => 'fasta', - -file => catfile($FindBin::Bin,'in.fasta'),2 + -file => catfile($FindBin::Bin,'in.fasta'), ); while ( my $result = $searchio->next_result() ) { From rbuels at dev.open-bio.org Mon Jul 6 01:58:38 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Mon, 6 Jul 2009 01:58:38 -0400 Subject: [Bioperl-guts-l] [15833] bioperl-live/branches/yapc10hackathon: tweaked SearchIO/psl. pm header-skipping logic, moved it out of the main parsing loop. Message-ID: <200907060558.n665wcDv015403@dev.open-bio.org> Revision: 15833 Author: rbuels Date: 2009-07-06 01:58:38 -0400 (Mon, 06 Jul 2009) Log Message: ----------- tweaked SearchIO/psl.pm header-skipping logic, moved it out of the main parsing loop. got rid of a couple of (apparently) unnecessary sprintf calls. added explicit (but minimal) logic that detects invalid query lengths and prevents division by zero. Modified Paths: -------------- bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm bioperl-live/branches/yapc10hackathon/t/SearchIO/psl.t Modified: bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm =================================================================== --- bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm 2009-07-06 05:42:42 UTC (rev 15832) +++ bioperl-live/branches/yapc10hackathon/Bio/SearchIO/psl.pm 2009-07-06 05:58:38 UTC (rev 15833) @@ -179,10 +179,9 @@ Title : next_result Usage : my $result = $parser->next_result Function: Parse the next result from the data stream - Returns : L + Returns : L or undef if no more results Args : none - =cut sub next_result { @@ -191,16 +190,12 @@ local $/ = "\n"; local $_; - while ( defined( $_ = $self->_readline ) ) { + # skip over any header lines + while( defined($_ = $self->_readline) and ! /^\d+\s+\d+\s+/ ) {} + $self->_pushback($_); - #clear header if exists - if (/^psLayout/) { - - #pass over header lines lines - while ( ( !/^\d+\s+\d+\s+/ ) && defined $_ ) { - $_ = $self->_readline; - } - } + # now start the main parsing loop + while ( defined( $_ = $self->_readline ) ) { my ( $matches, $mismatches, $rep_matches, $n_count, $q_num_insert, $q_base_insert, $t_num_insert, $t_base_insert, @@ -210,17 +205,14 @@ $t_starts ) = split; - my $score = eval { - sprintf "%.2f", - ( 100 * ( $matches + $mismatches + $rep_matches ) / $q_length ); - }; - next if $@; + $q_length > 0 or $self->throw("parse error, invalid query length '$q_length'"); + my $score = sprintf( "%.2f", 100 * ( $matches + $mismatches + $rep_matches ) / $q_length ); # this is overall percent identity... - my $percent_id = sprintf "%.2f", - ( 100 * - ( $matches + $rep_matches ) / - ( $matches + $mismatches + $rep_matches ) ); + my $match_total = $matches + $mismatches + $rep_matches; + $match_total > 0 + or $self->throw("parse error, matches + mismatches + rep_matches must be > 0!"); + my $percent_id = sprintf("%.2f", 100 * ( $matches + $rep_matches ) / $match_total ); # Remember Jim's code is 0 based if ( defined $lastquery @@ -455,7 +447,7 @@ if ( my $type = $MODEMAP{$nm} ) { $self->_mode($type); if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "start_%s", lc $type ); + my $func = 'start_'.lc $type; $self->_eventHandler->$func( $data->{'Attributes'} ); } unshift @{ $self->{'_elements'} }, $type; @@ -470,10 +462,10 @@ =head2 end_element - Title : start_element + Title : end_element Usage : $eventgenerator->end_element Function: Handles an end element event - Returns : none + Returns : return value from the associated end_$type event handler Args : hashref with at least 2 keys 'Data' and 'Name' @@ -489,7 +481,7 @@ if ( my $type = $MODEMAP{$nm} ) { if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "end_%s", lc $type ); + my $func = 'end_'.lc $type; $rc = $self->_eventHandler->$func( $self->{'_reporttype'}, $self->{'_values'} ); } Modified: bioperl-live/branches/yapc10hackathon/t/SearchIO/psl.t =================================================================== --- bioperl-live/branches/yapc10hackathon/t/SearchIO/psl.t 2009-07-06 05:42:42 UTC (rev 15832) +++ bioperl-live/branches/yapc10hackathon/t/SearchIO/psl.t 2009-07-06 05:58:38 UTC (rev 15833) @@ -3,13 +3,13 @@ use strict; -BEGIN { +BEGIN { use lib '.'; use Bio::Root::Test; - - test_begin(-tests => 49); - - use_ok('Bio::SearchIO'); + + test_begin(-tests => 52); + + use_ok('Bio::SearchIO'); } my $pslparser = Bio::SearchIO->new(-format => 'psl', @@ -88,3 +88,11 @@ is($q_gapblocks->[0]->[1],14); is($q_gapblocks->[1]->[1],21); is($q_gapblocks->[1]->[0],1152); + + +is( $hit->next_hsp, undef, 'next_hsp should be undef'); +is( $result->next_hit, undef, 'next_hit should be undef'); +TODO: { + local $TODO = "next_result should really return undef, not empty string"; + is( $pslparser->next_result, undef, 'next_result should be undef'); +} From bugzilla-daemon at portal.open-bio.org Mon Jul 6 13:04:56 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 6 Jul 2009 13:04:56 -0400 Subject: [Bioperl-guts-l] [Bug 2764] enhance SwissProt retreival by id using IDTracker In-Reply-To: Message-ID: <200907061704.n66H4uRt021374@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2764 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Target Milestone|1.6.1 point release |1.6.x point release ------- Comment #4 from cjfields at bioperl.org 2009-07-06 13:04 EST ------- Pushing this to an undetermined release in 1.6; unsure how this fits into the scheme of things right now. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Mon Jul 6 15:05:41 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Mon, 6 Jul 2009 15:05:41 -0400 Subject: [Bioperl-guts-l] [15834] bioperl-live/trunk: * write_seq() now generates fastq as expected, not fasta Message-ID: <200907061905.n66J5f1A017283@dev.open-bio.org> Revision: 15834 Author: cjfields Date: 2009-07-06 15:05:40 -0400 (Mon, 06 Jul 2009) Log Message: ----------- * write_seq() now generates fastq as expected, not fasta * write_fastq() delegates to write_seq(), will be deprecated * write_fasta() delegates to a FASTA-based SeqIO stream for consistency * write_qual() delegates to a Qual-based SeqIO stream for consistency Modified Paths: -------------- bioperl-live/trunk/Bio/SeqIO/fastq.pm bioperl-live/trunk/t/SeqIO/fastq.t Modified: bioperl-live/trunk/Bio/SeqIO/fastq.pm =================================================================== --- bioperl-live/trunk/Bio/SeqIO/fastq.pm 2009-07-06 05:58:38 UTC (rev 15833) +++ bioperl-live/trunk/Bio/SeqIO/fastq.pm 2009-07-06 19:05:40 UTC (rev 15834) @@ -115,7 +115,8 @@ Function: returns the next sequence in the stream Returns : Bio::Seq::Quality object Args : NONE - + Status : Stable + =cut sub next_seq { @@ -220,7 +221,10 @@ Usage : $stream->write_seq(@seq) Function: writes the $seq object into the stream Returns : 1 for success and 0 for error - Args : Bio::Seq::Quality or Bio::Seq object + Args : Bio::Seq::Quality + Note : This now conforms to SeqIO spec (module output is same format as + next_seq) + Status : Stable =cut @@ -230,104 +234,88 @@ sub write_seq { my ($self, at seq) = @_; foreach my $seq (@seq) { - my $str = $seq->seq; - my $top = $seq->display_id(); - if ($seq->can('desc') and my $desc = $seq->desc()) { - $desc =~ s/\n//g; - $top .= " $desc"; - } - if(length($str) > 0) { - $str =~ s/(.{1,60})/$1\n/g; - } else { - $str = "\n"; - } - - $self->_print (">",$top,"\n",$str) or return; + unless ($seq->isa("Bio::Seq::Quality")){ + $self->warn("You can't write FASTQ without supplying a Bio::Seq::Quality object! ", ref($seq), "\n"); + next; + } + my $str = $seq->seq; + my @qual = @{$seq->qual}; + my $top = $seq->display_id(); + if ($seq->can('desc') and my $desc = $seq->desc()) { + $desc =~ s/\n//g; + $top .= " $desc"; + } + if(length($str) == 0) { + $str = "\n"; + } + my $qual = join('', map {$self->{phred2chr}->{$_}} @qual); + $self->_print ("\@",$top,"\n",$str,"\n") or return; + $self->_print ("+",$top,"\n",$qual,"\n") or return; } - - $self->flush if $self->_flush_on_write && defined $self->_fh; return 1; } -=head2 write_qual +=head2 write_fastq - Title : write_qual - Usage : $stream->write_qual(@seq) + Title : write_fastq + Usage : $stream->write_fastq(@seq) Function: writes the $seq object into the stream Returns : 1 for success and 0 for error Args : Bio::Seq::Quality object + Status : Deprecated (delegates to write_seq) =cut -sub write_qual { - my ($self, at seq) = @_; - foreach my $seq (@seq) { - unless ($seq->isa("Bio::Seq::Quality")){ - $self->warn("You can write FASTQ without supplying a Bio::Seq::Quality object! ", ref($seq), "\n"); - next; - } - my @qual = @{$seq->qual}; - my $top = $seq->display_id(); - if ($seq->can('desc') and my $desc = $seq->desc()) { - $desc =~ s/\n//g; - $top .= " $desc"; - } - my $qual = "" ; - if(scalar(@qual) > 0) { - my $max = 60; - for (my $q = 0;$q $max){ - $qual .= "\n"; - $max += 60; - } - } - } else { - $qual = "\n"; - } - - $self->_print (">",$top,"\n",$qual,"\n") or return; - } - $self->flush if $self->_flush_on_write && defined $self->_fh; - - return 1; +sub write_fastq { + my ($self, at seq) = @_; + return $self->write_seq(@seq); } -=head2 write_fastq +=head2 write_fasta - Title : write_fastq - Usage : $stream->write_fastq(@seq) + Title : write_fasta + Usage : $stream->write_fasta(@seq) Function: writes the $seq object into the stream Returns : 1 for success and 0 for error - Args : Bio::Seq::Quality object + Args : Bio::Seq object + Note : This method does not currently delegate to Bio::SeqIO::fasta + (maybe it should?). Not sure whether we should keep this as a + convenience method. + Status : Unstable - =cut -sub write_fastq { +sub write_fasta { my ($self, at seq) = @_; - foreach my $seq (@seq) { - unless ($seq->isa("Bio::Seq::Quality")){ - $self->warn("You can't write FASTQ without supplying a Bio::Seq::Quality object! ", ref($seq), "\n"); - next; - } - my $str = $seq->seq; - my @qual = @{$seq->qual}; - my $top = $seq->display_id(); - if ($seq->can('desc') and my $desc = $seq->desc()) { - $desc =~ s/\n//g; - $top .= " $desc"; - } - if(length($str) == 0) { - $str = "\n"; - } - my $qual = join('', map {$self->{phred2chr}->{$_}} @qual); - $self->_print ("\@",$top,"\n",$str,"\n") or return; - $self->_print ("+",$top,"\n",$qual,"\n") or return; - } - return 1; + if (!exists($self->{fasta_proxy})) { + $self->{fasta_proxy} = Bio::SeqIO->new(-format => 'fasta', -fh => $self->_fh); + } + return $self->{fasta_proxy}->write_seq(@seq); } +=head2 write_qual + + Title : write_qual + Usage : $stream->write_qual(@seq) + Function: writes the $seq object into the stream + Returns : 1 for success and 0 for error + Args : Bio::Seq::Quality object + Note : This method does not currently delegate to Bio::SeqIO::qual + (maybe it should?). Not sure whether we should keep this as a + convenience method. + Status : Unstable + +=cut + +sub write_qual { + my ($self, at seq) = @_; + if (!exists($self->{qual_proxy})) { + $self->{qual_proxy} = Bio::SeqIO->new(-format => 'qual', -fh => $self->_fh); + } + + return $self->{qual_proxy}->write_seq(@seq); +} + =head2 variant Title : variant Modified: bioperl-live/trunk/t/SeqIO/fastq.t =================================================================== --- bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-06 05:58:38 UTC (rev 15833) +++ bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-06 19:05:40 UTC (rev 15834) @@ -135,8 +135,7 @@ if ($input_ct == 5) { $test_qual = $seq; } - # this will likely be changed to write_seq, NYI - $out->write_fastq($seq); + $out->write_seq($seq); } is($input_ct, $total, $variant." total"); $out->close; From rbuels at dev.open-bio.org Mon Jul 6 15:26:59 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Mon, 6 Jul 2009 15:26:59 -0400 Subject: [Bioperl-guts-l] [15835] bioperl-live/trunk: merge yapc10hackathon branch into trunk Message-ID: <200907061926.n66JQx5I017476@dev.open-bio.org> Revision: 15835 Author: rbuels Date: 2009-07-06 15:26:59 -0400 (Mon, 06 Jul 2009) Log Message: ----------- merge yapc10hackathon branch into trunk Modified Paths: -------------- bioperl-live/trunk/Bio/SearchIO/exonerate.pm bioperl-live/trunk/Bio/SearchIO/psl.pm bioperl-live/trunk/Bio/Seq/Quality.pm bioperl-live/trunk/Bio/SimpleAlign.pm bioperl-live/trunk/Bio/Tools/SeqPattern.pm bioperl-live/trunk/t/Align/SimpleAlign.t bioperl-live/trunk/t/SearchIO/exonerate.t bioperl-live/trunk/t/SearchIO/psl.t bioperl-live/trunk/t/SeqTools/SeqPattern.t Added Paths: ----------- bioperl-live/trunk/Bio/Tools/SeqPattern/ bioperl-live/trunk/t/SeqTools/Backtranslate.t bioperl-live/trunk/t/bugzilla/ Modified: bioperl-live/trunk/Bio/SearchIO/exonerate.pm =================================================================== --- bioperl-live/trunk/Bio/SearchIO/exonerate.pm 2009-07-06 19:05:40 UTC (rev 15834) +++ bioperl-live/trunk/Bio/SearchIO/exonerate.pm 2009-07-06 19:26:59 UTC (rev 15835) @@ -250,7 +250,7 @@ $self->{'_seencigar'} = 0; $self->{'_vulgar'} = 0; } elsif( s/^vulgar:\s+(\S+)\s+ # query sequence id - (\d+)\s+(\d+)\s+([\-\+])\s+ # query start-end-strand + (\d+)\s+(\d+)\s+([\-\+\.])\s+ # query start-end-strand (\S+)\s+ # target sequence id (\d+)\s+(\d+)\s+([\-\+])\s+ # target start-end-strand (\d+)\s+ # score Modified: bioperl-live/trunk/Bio/SearchIO/psl.pm =================================================================== --- bioperl-live/trunk/Bio/SearchIO/psl.pm 2009-07-06 19:05:40 UTC (rev 15834) +++ bioperl-live/trunk/Bio/SearchIO/psl.pm 2009-07-06 19:26:59 UTC (rev 15835) @@ -2,7 +2,7 @@ # # BioPerl module for Bio::SearchIO::psl # -# Please direct questions and support issues to +# Please direct questions and support issues to # # Cared for by Jason Stajich # @@ -75,10 +75,8 @@ =cut - # Let the code begin... - package Bio::SearchIO::psl; use vars qw(%MAPPING %MODEMAP $DEFAULT_WRITER_CLASS $DefaultProgramName); @@ -87,55 +85,55 @@ use Bio::Search::Hit::HitFactory; use Bio::Search::Result::ResultFactory; -$DefaultProgramName = 'BLAT'; +$DefaultProgramName = 'BLAT'; $DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter'; # mapping of terms to Bioperl hash keys %MODEMAP = ( - 'PSLOutput' => 'result', - 'Result' => 'result', - 'Hit' => 'hit', - 'Hsp' => 'hsp' - ); + 'PSLOutput' => 'result', + 'Result' => 'result', + 'Hit' => 'hit', + 'Hsp' => 'hsp' +); -%MAPPING = ( - 'Hsp_bit-score' => 'HSP-bits', - 'Hsp_score' => 'HSP-score', - 'Hsp_evalue' => 'HSP-evalue', - 'Hsp_query-from' => 'HSP-query_start', - 'Hsp_query-to' => 'HSP-query_end', - 'Hsp_hit-from' => 'HSP-hit_start', - 'Hsp_hit-to' => 'HSP-hit_end', - 'Hsp_positive' => 'HSP-conserved', - 'Hsp_identity' => 'HSP-identical', - 'Hsp_mismatches' => 'HSP-mismatches', - 'Hsp_qgapblocks' => 'HSP-query_gapblocks', - 'Hsp_hgapblocks' => 'HSP-hit_gapblocks', - 'Hsp_gaps' => 'HSP-hsp_gaps', - 'Hsp_hitgaps' => 'HSP-hit_gaps', - 'Hsp_querygaps' => 'HSP-query_gaps', - 'Hsp_align-len' => 'HSP-hsp_length', - 'Hsp_query-frame'=> 'HSP-query_frame', - 'Hsp_hit-frame' => 'HSP-hit_frame', +%MAPPING = ( + 'Hsp_bit-score' => 'HSP-bits', + 'Hsp_score' => 'HSP-score', + 'Hsp_evalue' => 'HSP-evalue', + 'Hsp_query-from' => 'HSP-query_start', + 'Hsp_query-to' => 'HSP-query_end', + 'Hsp_hit-from' => 'HSP-hit_start', + 'Hsp_hit-to' => 'HSP-hit_end', + 'Hsp_positive' => 'HSP-conserved', + 'Hsp_identity' => 'HSP-identical', + 'Hsp_mismatches' => 'HSP-mismatches', + 'Hsp_qgapblocks' => 'HSP-query_gapblocks', + 'Hsp_hgapblocks' => 'HSP-hit_gapblocks', + 'Hsp_gaps' => 'HSP-hsp_gaps', + 'Hsp_hitgaps' => 'HSP-hit_gaps', + 'Hsp_querygaps' => 'HSP-query_gaps', + 'Hsp_align-len' => 'HSP-hsp_length', + 'Hsp_query-frame' => 'HSP-query_frame', + 'Hsp_hit-frame' => 'HSP-hit_frame', - 'Hit_id' => 'HIT-name', - 'Hit_len' => 'HIT-length', - 'Hit_accession' => 'HIT-accession', - 'Hit_def' => 'HIT-description', - 'Hit_signif' => 'HIT-significance', - 'Hit_score' => 'HIT-score', - 'Hit_bits' => 'HIT-bits', + 'Hit_id' => 'HIT-name', + 'Hit_len' => 'HIT-length', + 'Hit_accession' => 'HIT-accession', + 'Hit_def' => 'HIT-description', + 'Hit_signif' => 'HIT-significance', + 'Hit_score' => 'HIT-score', + 'Hit_bits' => 'HIT-bits', - 'PSLOutput_program' => 'RESULT-algorithm_name', - 'PSLOutput_version' => 'RESULT-algorithm_version', - 'PSLOutput_query-def'=> 'RESULT-query_name', - 'PSLOutput_query-len'=> 'RESULT-query_length', - 'PSLOutput_query-acc'=> 'RESULT-query_accession', - 'PSLOutput_querydesc'=> 'RESULT-query_description', - 'PSLOutput_db' => 'RESULT-database_name', - 'PSLOutput_db-len' => 'RESULT-database_entries', - 'PSLOutput_db-let' => 'RESULT-database_letters', - ); + 'PSLOutput_program' => 'RESULT-algorithm_name', + 'PSLOutput_version' => 'RESULT-algorithm_version', + 'PSLOutput_query-def' => 'RESULT-query_name', + 'PSLOutput_query-len' => 'RESULT-query_length', + 'PSLOutput_query-acc' => 'RESULT-query_accession', + 'PSLOutput_querydesc' => 'RESULT-query_description', + 'PSLOutput_db' => 'RESULT-database_name', + 'PSLOutput_db-len' => 'RESULT-database_entries', + 'PSLOutput_db-let' => 'RESULT-database_letters', +); use base qw(Bio::SearchIO); @@ -151,158 +149,283 @@ =cut sub _initialize { - my ($self, at args) = @_; + my ( $self, @args ) = @_; $self->SUPER::_initialize(@args); - my ($pname) = $self->_rearrange([qw(PROGRAM_NAME)], - @args); - $self->program_name($pname || $DefaultProgramName); - $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult')); + my ($pname) = $self->_rearrange( [qw(PROGRAM_NAME)], @args ); + $self->program_name( $pname || $DefaultProgramName ); + $self->_eventHandler->register_factory( + 'result', + Bio::Search::Result::ResultFactory->new( + -type => 'Bio::Search::Result::GenericResult' + ) + ); - $self->_eventHandler->register_factory('hit', Bio::Search::Hit::HitFactory->new(-type => 'Bio::Search::Hit::GenericHit')); - $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::PSLHSP')); + $self->_eventHandler->register_factory( + 'hit', + Bio::Search::Hit::HitFactory->new( + -type => 'Bio::Search::Hit::GenericHit' + ) + ); + $self->_eventHandler->register_factory( + 'hsp', + Bio::Search::HSP::HSPFactory->new( + -type => 'Bio::Search::HSP::PSLHSP' + ) + ); } - =head2 next_result Title : next_result Usage : my $result = $parser->next_result Function: Parse the next result from the data stream - Returns : L + Returns : L or undef if no more results Args : none - =cut -sub next_result{ - my ($self) = @_; - my ($lastquery,$lasthit); - local $/ = "\n"; - local $_; +sub next_result { + my ($self) = @_; + my ( $lastquery, $lasthit ); + local $/ = "\n"; + local $_; - while( defined ($_ = $self->_readline) ) { - #clear header if exists - if(/^psLayout/){ - #pass over header lines lines - while(!/^\d+\s+\d+\s+/) { - $_ = $self->_readline; - } - } - my ( $matches,$mismatches,$rep_matches,$n_count, - $q_num_insert,$q_base_insert, - $t_num_insert, $t_base_insert, - $strand, $q_name, $q_length, $q_start, - $q_end, $t_name, $t_length,$t_start, $t_end, $block_count, - $block_sizes, $q_starts, $t_starts - ) = split; + # skip over any header lines + while( defined($_ = $self->_readline) and ! /^\d+\s+\d+\s+/ ) {} + $self->_pushback($_); - my $score = sprintf "%.2f", ( 100 * ( $matches + $mismatches + $rep_matches ) / $q_length ); + # now start the main parsing loop + while ( defined( $_ = $self->_readline ) ) { + my ( + $matches, $mismatches, $rep_matches, $n_count, + $q_num_insert, $q_base_insert, $t_num_insert, $t_base_insert, + $strand, $q_name, $q_length, $q_start, + $q_end, $t_name, $t_length, $t_start, + $t_end, $block_count, $block_sizes, $q_starts, + $t_starts + ) = split; - # this is overall percent identity... - my $percent_id = sprintf "%.2f", ( 100 * ($matches + $rep_matches)/( $matches + $mismatches + $rep_matches ) -); + $q_length > 0 or $self->throw("parse error, invalid query length '$q_length'"); + my $score = sprintf( "%.2f", 100 * ( $matches + $mismatches + $rep_matches ) / $q_length ); - # Remember Jim's code is 0 based - if( defined $lastquery && - $lastquery ne $q_name ) { - $self->end_element({'Name' => 'Hit'}); - $self->end_element({'Name' => 'PSLOutput'}); - $self->_pushback($_); - return $self->end_document; - } elsif( ! defined $lastquery ) { - $self->{'_result_count'}++; - $self->start_element({'Name' => 'PSLOutput'}); - $self->element({'Name' => 'PSLOutput_program', - 'Data' => $self->program_name}); - $self->element({'Name' => 'PSLOutput_query-def', - 'Data' => $q_name}); - $self->element({'Name' => 'PSLOutput_query-len', - 'Data' => $q_length}); - $self->start_element({'Name' => 'Hit'}); - $self->element({'Name' => 'Hit_id', - 'Data' => $t_name}); - $self->element({'Name' => 'Hit_len', - 'Data' => $t_length}); - $self->element({'Name' => 'Hit_score', - 'Data' => $score}); - } elsif( $lasthit ne $t_name ) { - $self->end_element({'Name' => 'Hit'}); - $self->start_element({'Name' => 'Hit'}); - $self->element({'Name' => 'Hit_id', - 'Data' => $t_name}); - $self->element({'Name' => 'Hit_len', - 'Data' => $t_length}); - $self->element({'Name' => 'Hit_score', - 'Data' => $score}); - } - - my $identical = $matches + $rep_matches; - $self->start_element({'Name' => 'Hsp'}); - $self->element({'Name' => 'Hsp_score', - 'Data' => $score}); - $self->element({'Name' => 'Hsp_identity', - 'Data' => $identical}); - $self->element({'Name' => 'Hsp_positive', @@ Diff output truncated at 10000 characters. @@ From bugzilla-daemon at portal.open-bio.org Mon Jul 6 15:30:35 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 6 Jul 2009 15:30:35 -0400 Subject: [Bioperl-guts-l] [Bug 2847] [Bio::Seq::Quality] get_clear_range on empty clear ranges In-Reply-To: Message-ID: <200907061930.n66JUZSI027058@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2847 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|RESOLVED |REOPENED Resolution|FIXED | Target Milestone|1.6.2 point release |1.6.1 point release ------- Comment #3 from cjfields at bioperl.org 2009-07-06 15:30 EST ------- Reopening due to some test failures after YAPC hackathon merge. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Mon Jul 6 15:37:22 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Mon, 6 Jul 2009 15:37:22 -0400 Subject: [Bioperl-guts-l] [15836] bioperl-live/trunk/t/bugzilla/2847/test_clear_range.fastq: test case wasn't valid FASTQ format; changed to make valid Message-ID: <200907061937.n66JbMkD017531@dev.open-bio.org> Revision: 15836 Author: cjfields Date: 2009-07-06 15:37:22 -0400 (Mon, 06 Jul 2009) Log Message: ----------- test case wasn't valid FASTQ format; changed to make valid Modified Paths: -------------- bioperl-live/trunk/t/bugzilla/2847/test_clear_range.fastq Modified: bioperl-live/trunk/t/bugzilla/2847/test_clear_range.fastq =================================================================== --- bioperl-live/trunk/t/bugzilla/2847/test_clear_range.fastq 2009-07-06 19:26:59 UTC (rev 15835) +++ bioperl-live/trunk/t/bugzilla/2847/test_clear_range.fastq 2009-07-06 19:37:22 UTC (rev 15836) @@ -1,4 +1,4 @@ @test GATAGATAGATAGATAGATAGATAGATTTTAGATTTG - at test ++test ##################################### From bugzilla-daemon at portal.open-bio.org Mon Jul 6 15:39:18 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 6 Jul 2009 15:39:18 -0400 Subject: [Bioperl-guts-l] [Bug 2847] [Bio::Seq::Quality] get_clear_range on empty clear ranges In-Reply-To: Message-ID: <200907061939.n66JdIOt027472@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2847 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|REOPENED |RESOLVED Resolution| |FIXED ------- Comment #4 from cjfields at bioperl.org 2009-07-06 15:39 EST ------- And closing again! Bad test example... -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Mon Jul 6 16:02:51 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Mon, 6 Jul 2009 16:02:51 -0400 Subject: [Bioperl-guts-l] [15837] bioperl-live/trunk/Bio/Tools/CodonTable.pm: untabify and align Message-ID: <200907062002.n66K2pV2017587@dev.open-bio.org> Revision: 15837 Author: cjfields Date: 2009-07-06 16:02:51 -0400 (Mon, 06 Jul 2009) Log Message: ----------- untabify and align Modified Paths: -------------- bioperl-live/trunk/Bio/Tools/CodonTable.pm Modified: bioperl-live/trunk/Bio/Tools/CodonTable.pm =================================================================== --- bioperl-live/trunk/Bio/Tools/CodonTable.pm 2009-07-06 19:37:22 UTC (rev 15836) +++ bioperl-live/trunk/Bio/Tools/CodonTable.pm 2009-07-06 20:02:51 UTC (rev 15837) @@ -33,7 +33,7 @@ # examine codon table print join (' ', "The name of the codon table no.", $myCodonTable->id(4), - "is:", $myCodonTable->name(), "\n"); + "is:", $myCodonTable->name(), "\n"); # print possible codon tables $tables = Bio::Tools::CodonTable->tables; @@ -109,8 +109,8 @@ M Met Methionine F Phe Phenylalanine P Pro Proline - O Pyl Pyrrolysine (22nd amino acid) - U Sec Selenocysteine (21st amino acid) + O Pyl Pyrrolysine (22nd amino acid) + U Sec Selenocysteine (21st amino acid) S Ser Serine T Thr Threonine W Trp Tryptophan @@ -118,7 +118,7 @@ V Val Valine B Asx Aspartic acid or Asparagine Z Glx Glutamine or Glutamic acid - J Xle Isoleucine or Valine (mass spec ambiguity) + J Xle Isoleucine or Valine (mass spec ambiguity) X Xaa Any or unknown amino acid @@ -185,7 +185,7 @@ package Bio::Tools::CodonTable; use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP - %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR); + %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR); use strict; # Object preamble - inherits from Bio::Root::Root @@ -202,89 +202,89 @@ $GAP = '-'; $CODONGAP = $GAP x CODONSIZE; - @NAMES = #id - ( - 'Standard', #1 - 'Vertebrate Mitochondrial',#2 - 'Yeast Mitochondrial',# 3 - 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4 - 'Invertebrate Mitochondrial',#5 - 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 - '', '', - 'Echinoderm Mitochondrial',#9 - 'Euplotid Nuclear',#10 - '"Bacterial"',# 11 - 'Alternative Yeast Nuclear',# 12 - 'Ascidian Mitochondrial',# 13 - 'Flatworm Mitochondrial',# 14 - 'Blepharisma Nuclear',# 15 - 'Chlorophycean Mitochondrial',# 16 - '', '', '', '', - 'Trematode Mitochondrial',# 21 - 'Scenedesmus obliquus Mitochondrial', #22 - 'Thraustochytrium Mitochondrial' #23 - ); + @NAMES = #id + ( + 'Standard', #1 + 'Vertebrate Mitochondrial',#2 + 'Yeast Mitochondrial',# 3 + 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4 + 'Invertebrate Mitochondrial',#5 + 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 + '', '', + 'Echinoderm Mitochondrial',#9 + 'Euplotid Nuclear',#10 + '"Bacterial"',# 11 + 'Alternative Yeast Nuclear',# 12 + 'Ascidian Mitochondrial',# 13 + 'Flatworm Mitochondrial',# 14 + 'Blepharisma Nuclear',# 15 + 'Chlorophycean Mitochondrial',# 16 + '', '', '', '', + 'Trematode Mitochondrial',# 21 + 'Scenedesmus obliquus Mitochondrial', #22 + 'Thraustochytrium Mitochondrial' #23 + ); @TABLES = - qw( - FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG - FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - '' '' - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG - FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG - FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG - FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - '' '' '' '' - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG - FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - ); + qw( + FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG + FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG + FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + '' '' + FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG + FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG + FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG + FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + '' '' '' '' + FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG + FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG + ); @STARTS = - qw( - ---M---------------M---------------M---------------------------- - --------------------------------MMMM---------------M------------ - ----------------------------------MM---------------------------- - --MM---------------M------------MMMM---------------M------------ - ---M----------------------------MMMM---------------M------------ - -----------------------------------M---------------------------- - '' '' - -----------------------------------M---------------------------- - -----------------------------------M---------------------------- - ---M---------------M------------MMMM---------------M------------ - -------------------M---------------M---------------------------- - -----------------------------------M---------------------------- - -----------------------------------M---------------------------- - -----------------------------------M---------------------------- - -----------------------------------M---------------------------- - '' '' '' '' - -----------------------------------M---------------M------------ - -----------------------------------M---------------------------- - --------------------------------M--M---------------M------------ - ); + qw( + ---M---------------M---------------M---------------------------- + --------------------------------MMMM---------------M------------ + ----------------------------------MM---------------------------- + --MM---------------M------------MMMM---------------M------------ + ---M----------------------------MMMM---------------M------------ + -----------------------------------M---------------------------- + '' '' + -----------------------------------M---------------------------- + -----------------------------------M---------------------------- + ---M---------------M------------MMMM---------------M------------ + -------------------M---------------M---------------------------- + -----------------------------------M---------------------------- + -----------------------------------M---------------------------- + -----------------------------------M---------------------------- + -----------------------------------M---------------------------- + '' '' '' '' + -----------------------------------M---------------M------------ + -----------------------------------M---------------------------- + --------------------------------M--M---------------M------------ + ); my @nucs = qw(t c a g); my $x = 0; ($CODONS, $TRCOL) = ({}, {}); for my $i (@nucs) { - for my $j (@nucs) { - for my $k (@nucs) { - my $codon = "$i$j$k"; - $CODONS->{$codon} = $x; - $TRCOL->{$x} = $codon; - $x++; - } - } + for my $j (@nucs) { + for my $k (@nucs) { + my $codon = "$i$j$k"; + $CODONS->{$codon} = $x; + $TRCOL->{$x} = $codon; + $x++; + } } + } %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub(); %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup(); %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2); @@ -297,9 +297,9 @@ my $self = $class->SUPER::new(@args); my($id) = - $self->_rearrange([qw(ID - )], - @args); + $self->_rearrange([qw(ID + )], + @args); $id = 1 if ( ! $id ); $id && $self->id($id); @@ -324,8 +324,8 @@ my ($self,$value) = @_; if( defined $value) { if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') { - $self->warn("Not a valid codon table ID [$value] "); - $value = 0; + $self->warn("Not a valid codon table ID [$value] "); + $value = 0; } $self->{'id'} = $value; } @@ -419,38 +419,38 @@ if ($seq =~ /[^actg]/ ) { #ambiguous chars for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) { my $triplet = substr($seq, $i, CODONSIZE); - if( $triplet eq $CODONGAP ) { - $protein .= $GAP; - } elsif (exists $CODONS->{$triplet}) { - $protein .= substr($TABLES[$id-1], - $CODONS->{$triplet},1); - } else { - $protein .= $self->_translate_ambiguous_codon($triplet); - } - } @@ Diff output truncated at 10000 characters. @@ From cjfields at dev.open-bio.org Mon Jul 6 18:24:17 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Mon, 6 Jul 2009 18:24:17 -0400 Subject: [Bioperl-guts-l] [15838] bioperl-live/trunk/Bio/Tree/Tree.pm: [bug 2869] Message-ID: <200907062224.n66MOHnF017763@dev.open-bio.org> Revision: 15838 Author: cjfields Date: 2009-07-06 18:24:17 -0400 (Mon, 06 Jul 2009) Log Message: ----------- [bug 2869] * Add some documentation to indicate edge case where Tree is gc early, resulting in blank Nodes Modified Paths: -------------- bioperl-live/trunk/Bio/Tree/Tree.pm Modified: bioperl-live/trunk/Bio/Tree/Tree.pm =================================================================== --- bioperl-live/trunk/Bio/Tree/Tree.pm 2009-07-06 20:02:51 UTC (rev 15837) +++ bioperl-live/trunk/Bio/Tree/Tree.pm 2009-07-06 22:24:17 UTC (rev 15838) @@ -29,6 +29,28 @@ This object holds handles to Nodes which make up a tree. +=head1 IMPLEMENTATION NOTE + +This implementation of Bio::Tree::Tree contains Bio::Tree:::NodeI; mainly linked +via the root node. As NodeI can potentially contain circular references (as +nodes will need to refer to both parent and child nodes), Bio::Tree::Tree will +remove those circular references when the object is garbage-collected. This has +some side effects; primarily, one must keep the Tree in scope or have at least +one reference to it if working with nodes. The fix is to count the references to +the nodes and if it is greater than expected retain all of them, but it requires +an additional prereq and thus may not be worth the effort. This only shows up +in minor edge cases, though (see Bug #2869). + +Example of issue: + + # tree is not assigned to a variable, so passes from memory after + # root node is passed + my $root = Bio::TreeIO->new(-format => 'newick', -file => 'foo.txt')->next_tree + ->get_root_node; + + # gets nothing, as all Node links are broken when Tree is garbage-collected above + my @descendents = $root->get_all_Descendents; + =head1 FEEDBACK =head2 Mailing Lists @@ -114,7 +136,7 @@ $self->{'_maxbranchlen'} = 0; $self->_register_for_cleanup(\&cleanup_tree); my ($root,$node,$nodel,$id,$score)= $self->_rearrange([qw(ROOT NODE NODELETE - ID SCORE)], @args); + ID SCORE)], @args); if ($node && ! $root) { $self->throw("Must supply a Bio::Tree::NodeI") unless ref($node) && $node->isa('Bio::Tree::NodeI'); @@ -152,7 +174,6 @@ Returns : boolean Args : on set, new boolean value - =cut sub nodelete{ @@ -226,9 +247,9 @@ if( @_ ) { my $value = shift; if( defined $value && - ! $value->isa('Bio::Tree::NodeI') ) { - $self->warn("Trying to set the root node to $value which is not a Bio::Tree::NodeI"); - return $self->get_root_node; + ! $value->isa('Bio::Tree::NodeI') ) { + $self->warn("Trying to set the root node to $value which is not a Bio::Tree::NodeI"); + return $self->get_root_node; } $self->{'_rootnode'} = $value; } @@ -265,7 +286,7 @@ return unless $node; my $sum = 0; for ( $node->get_all_Descendents ) { - $sum += $_->branch_length || 0; + $sum += $_->branch_length || 0; } return $sum; } @@ -355,7 +376,7 @@ sub set_tag_value{ my ($self,$tag, at values) = @_; if( ! defined $tag || ! scalar @values ) { - $self->warn("cannot call set_tag_value with an undefined value"); + $self->warn("cannot call set_tag_value with an undefined value"); } $self->remove_tag ($tag); map { push @{$self->{'_tags'}->{$tag}}, $_ } @values; @@ -376,7 +397,7 @@ sub add_tag_value{ my ($self,$tag,$value) = @_; if( ! defined $tag || ! defined $value ) { - $self->warn("cannot call add_tag_value with an undefined value"); + $self->warn("cannot call add_tag_value with an undefined value"); } push @{$self->{'_tags'}->{$tag}}, $value; return scalar @{$self->{'_tags'}->{$tag}}; From bugzilla-daemon at portal.open-bio.org Mon Jul 6 18:32:38 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 6 Jul 2009 18:32:38 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907062232.n66MWcff001676@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 ------- Comment #6 from cjfields at bioperl.org 2009-07-06 18:32 EST ------- Added some documentation indicating the bug. There is an obvious workaround as noted; we can make a final decision as how we want to approach this in the next release. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From rbuels at dev.open-bio.org Tue Jul 7 15:46:29 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Tue, 7 Jul 2009 15:46:29 -0400 Subject: [Bioperl-guts-l] [15839] bioperl-live/trunk/t/SeqIO/fastq.t: perltidy on t/SeqIO/fastq.t Message-ID: <200907071946.n67JkTmq021313@dev.open-bio.org> Revision: 15839 Author: rbuels Date: 2009-07-07 15:46:28 -0400 (Tue, 07 Jul 2009) Log Message: ----------- perltidy on t/SeqIO/fastq.t Modified Paths: -------------- bioperl-live/trunk/t/SeqIO/fastq.t Modified: bioperl-live/trunk/t/SeqIO/fastq.t =================================================================== --- bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-06 22:24:17 UTC (rev 15838) +++ bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-07 19:46:28 UTC (rev 15839) @@ -6,11 +6,11 @@ BEGIN { use lib '.'; use Bio::Root::Test; - - test_begin(-tests => 52); - + + test_begin( -tests => 52 ); + use_ok('Bio::SeqIO::fastq'); - use_ok('Bio::Seq::Quality'); + use_ok('Bio::Seq::Quality'); } my $DEBUG = test_debug(); @@ -18,138 +18,160 @@ # original FASTQ (Sanger); data is from NCBI SRA database, which has # all data converted over to Sanger version of FASTQ -my $in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test1_sanger.fastq'), - -format => 'fastq'); -isa_ok($in_qual, 'Bio::SeqIO'); +my $in_qual = Bio::SeqIO->new( + -file => test_input_file( 'fastq', 'test1_sanger.fastq' ), + -format => 'fastq' +); +isa_ok( $in_qual, 'Bio::SeqIO' ); my $qual = $in_qual->next_seq(); -isa_ok($qual, 'Bio::Seq::Quality'); +isa_ok( $qual, 'Bio::Seq::Quality' ); -my @quals = @{$qual->qual()}; -is(@quals, 326, 'number of qual values'); +my @quals = @{ $qual->qual() }; +is( @quals, 326, 'number of qual values' ); -my $qualslice = join(',', at quals[25..35]); -is($qualslice, '30,17,17,16,16,16,16,21,18,18,20', 'qual slice'); +my $qualslice = join( ',', @quals[ 25 .. 35 ] ); +is( $qualslice, '30,17,17,16,16,16,16,21,18,18,20', 'qual slice' ); -is($qual->display_id, 'SRR005406.1'); -is($qual->desc, 'FB9GE3J10GA1VT length=326'); +is( $qual->display_id, 'SRR005406.1' ); +is( $qual->desc, 'FB9GE3J10GA1VT length=326' ); # Solexa, aka Illumina v1.0 # this is the test example from the MAQ script , better examples welcome! -$in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test2_solexa.fastq'), - -format => 'fastq-solexa'); +$in_qual = Bio::SeqIO->new( + -file => test_input_file( 'fastq', 'test2_solexa.fastq' ), + -format => 'fastq-solexa' +); $qual = $in_qual->next_seq(); -isa_ok($qual, 'Bio::Seq::Quality'); +isa_ok( $qual, 'Bio::Seq::Quality' ); - at quals = @{$qual->qual()}; -is(@quals, 25, 'number of qual values'); + at quals = @{ $qual->qual() }; +is( @quals, 25, 'number of qual values' ); -$qualslice = join(',', at quals[12..24]); -is($qualslice, '25,25,25,25,25,25,23,25,23,25,25,19,21', 'qual slice'); +$qualslice = join( ',', @quals[ 12 .. 24 ] ); +is( $qualslice, '25,25,25,25,25,25,23,25,23,25,25,19,21', 'qual slice' ); -is($qual->display_id, 'SLXA-B3_649_FC8437_R1_1_1_610_79'); -is($qual->desc, undef); +is( $qual->display_id, 'SLXA-B3_649_FC8437_R1_1_1_610_79' ); +is( $qual->desc, undef ); # Illumina v1.3 -$in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test3_illumina.fastq'), - -format => 'fastq-illumina'); +$in_qual = Bio::SeqIO->new( + -file => test_input_file( 'fastq', 'test3_illumina.fastq' ), + -format => 'fastq-illumina' +); $qual = $in_qual->next_seq(); -isa_ok($qual, 'Bio::Seq::Quality'); +isa_ok( $qual, 'Bio::Seq::Quality' ); - at quals = @{$qual->qual()}; -is(@quals, 25, 'number of qual values'); + at quals = @{ $qual->qual() }; +is( @quals, 25, 'number of qual values' ); -$qualslice = join(',', at quals[12..22]); -is($qualslice, '24,20,20,19,21,24,19,19,24,11,20', 'qual slice'); +$qualslice = join( ',', @quals[ 12 .. 22 ] ); +is( $qualslice, '24,20,20,19,21,24,19,19,24,11,20', 'qual slice' ); -is($qual->display_id, 'FC12044_91407_8_200_406_24'); -is($qual->desc, undef); +is( $qual->display_id, 'FC12044_91407_8_200_406_24' ); +is( $qual->desc, undef ); # bug 2335 -$in_qual = Bio::SeqIO->new('-file' => test_input_file('fastq','bug2335.fastq'), - '-format' => 'fastq-sanger'); +$in_qual = Bio::SeqIO->new( + '-file' => test_input_file( 'fastq', 'bug2335.fastq' ), + '-format' => 'fastq-sanger' +); $qual = $in_qual->next_seq(); -isa_ok($qual, 'Bio::Seq::Quality'); +isa_ok( $qual, 'Bio::Seq::Quality' ); - at quals = @{$qual->qual()}; + at quals = @{ $qual->qual() }; -is(@quals, 111, 'number of qual values'); +is( @quals, 111, 'number of qual values' ); -$qualslice = join(',', at quals[0..10]); -is($qualslice, '31,23,32,23,31,22,27,28,32,24,25', 'qual slice'); +$qualslice = join( ',', @quals[ 0 .. 10 ] ); +is( $qualslice, '31,23,32,23,31,22,27,28,32,24,25', 'qual slice' ); -is($qual->display_id, 'DS6BPQV01A2G0A'); -is($qual->desc, undef); +is( $qual->display_id, 'DS6BPQV01A2G0A' ); +is( $qual->desc, undef ); # raw data -$in_qual = Bio::SeqIO->new(-file => test_input_file('fastq','test3_illumina.fastq'), - -variant => 'illumina', - -format => 'fastq'); +$in_qual = Bio::SeqIO->new( + -file => test_input_file( 'fastq', 'test3_illumina.fastq' ), + -variant => 'illumina', + -format => 'fastq' +); $qual = $in_qual->next_dataset(); -isa_ok($qual, 'HASH'); -is($qual->{-seq}, 'GTTAGCTCCCACCTTAAGATGTTTA'); -is($qual->{-raw_quality}, 'SXXTXXXXXXXXXTTSUXSSXKTMQ'); -is($qual->{-id}, 'FC12044_91407_8_200_406_24'); -is($qual->{-desc}, ''); -is($qual->{-descriptor}, 'FC12044_91407_8_200_406_24'); -is(join(',',@{$qual->{-qual}}[0..10]), '19,24,24,20,24,24,24,24,24,24,24'); +isa_ok( $qual, 'HASH' ); +is( $qual->{-seq}, 'GTTAGCTCCCACCTTAAGATGTTTA' ); +is( $qual->{-raw_quality}, 'SXXTXXXXXXXXXTTSUXSSXKTMQ' ); +is( $qual->{-id}, 'FC12044_91407_8_200_406_24' ); +is( $qual->{-desc}, '' ); +is( $qual->{-descriptor}, 'FC12044_91407_8_200_406_24' ); +is( + join( ',', @{ $qual->{-qual} }[ 0 .. 10 ] ), + '19,24,24,20,24,24,24,24,24,24,24' +); # can this be used in a constructor? my $qualobj = Bio::Seq::Quality->new(%$qual); -is($qualobj->seq, 'GTTAGCTCCCACCTTAAGATGTTTA'); -is($qualobj->display_id, 'FC12044_91407_8_200_406_24'); -is($qualobj->desc, undef); -is(join(',',@{$qualobj->qual}[0..10]), '19,24,24,20,24,24,24,24,24,24,24'); +is( $qualobj->seq, 'GTTAGCTCCCACCTTAAGATGTTTA' ); +is( $qualobj->display_id, 'FC12044_91407_8_200_406_24' ); +is( $qualobj->desc, undef ); +is( + join( ',', @{ $qualobj->qual }[ 0 .. 10 ] ), + '19,24,24,20,24,24,24,24,24,24,24' +); # round trip tests for write_fastq my %format = ( - 'fastq-sanger' => ['test1_sanger.fastq', 250], - 'fastq-solexa' => ['test2_solexa.fastq', 5], - 'fastq-illumina' => ['test3_illumina.fastq', 25] - ); + 'fastq-sanger' => [ 'test1_sanger.fastq', 250 ], + 'fastq-solexa' => [ 'test2_solexa.fastq', 5 ], + 'fastq-illumina' => [ 'test3_illumina.fastq', 25 ] +); -while (my ($variant, $data) = each %format) { - my $outfile = "$variant.fastq"; - my ($file, $total) = @$data; - $file = test_input_file('fastq', $file); - my $in = Bio::SeqIO->new(-format => $variant, - -file => $file); - my $out = Bio::SeqIO->new(-format => $variant, - -file => ">$outfile"); - my ($input_ct, $round_trip) = (0,0); - my $test_qual; - while (my $seq = $in->next_seq) { - $input_ct++; - if ($input_ct == 5) { - $test_qual = $seq; - } - $out->write_seq($seq); - } - is($input_ct, $total, $variant." total"); - $out->close; - my $new_in = Bio::SeqIO->new(-format => $variant, - -file => $outfile); - while (my $seq = $new_in->next_seq) { - $round_trip++; - if ($round_trip == 5) { - for my $att (qw(seq display_id desc)) { - is($seq->$att, $test_qual->$att, "Testing $att"); - } - is_deeply($seq->qual, $test_qual->qual, "Testing qual"); - } - } - is($round_trip, $total, $variant." total"); +while ( my ( $variant, $data ) = each %format ) { + my $outfile = "$variant.fastq"; + my ( $file, $total ) = @$data; + $file = test_input_file( 'fastq', $file ); + my $in = Bio::SeqIO->new( + -format => $variant, + -file => $file + ); + my $out = Bio::SeqIO->new( + -format => $variant, + -file => ">$outfile" + ); + my ( $input_ct, $round_trip ) = ( 0, 0 ); + my $test_qual; + while ( my $seq = $in->next_seq ) { + $input_ct++; + if ( $input_ct == 5 ) { + $test_qual = $seq; + } + $out->write_seq($seq); + } + is( $input_ct, $total, $variant . " total" ); + $out->close; + my $new_in = Bio::SeqIO->new( + -format => $variant, + -file => $outfile + ); + while ( my $seq = $new_in->next_seq ) { + $round_trip++; + if ( $round_trip == 5 ) { + for my $att (qw(seq display_id desc)) { + is( $seq->$att, $test_qual->$att, "Testing $att" ); + } + is_deeply( $seq->qual, $test_qual->qual, "Testing qual" ); + } + } + is( $round_trip, $total, $variant . " total" ); } From rbuels at dev.open-bio.org Tue Jul 7 15:48:15 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Tue, 7 Jul 2009 15:48:15 -0400 Subject: [Bioperl-guts-l] [15840] bioperl-live/trunk/t/SeqIO/fastq.t: fixed stray output file emanating from fastq.t. Message-ID: <200907071948.n67JmFT0021344@dev.open-bio.org> Revision: 15840 Author: rbuels Date: 2009-07-07 15:48:15 -0400 (Tue, 07 Jul 2009) Log Message: ----------- fixed stray output file emanating from fastq.t. was leaving garbage files around in root dir when running test Modified Paths: -------------- bioperl-live/trunk/t/SeqIO/fastq.t Modified: bioperl-live/trunk/t/SeqIO/fastq.t =================================================================== --- bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-07 19:46:28 UTC (rev 15839) +++ bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-07 19:48:15 UTC (rev 15840) @@ -137,7 +137,7 @@ ); while ( my ( $variant, $data ) = each %format ) { - my $outfile = "$variant.fastq"; + my $outfile = test_output_file(); my ( $file, $total ) = @$data; $file = test_input_file( 'fastq', $file ); my $in = Bio::SeqIO->new( From cjfields at illinois.edu Tue Jul 7 18:28:54 2009 From: cjfields at illinois.edu (Chris Fields) Date: Tue, 7 Jul 2009 17:28:54 -0500 Subject: [Bioperl-guts-l] [15840] bioperl-live/trunk/t/SeqIO/fastq.t: fixed stray output file emanating from fastq.t. In-Reply-To: <200907071948.n67JmFT0021344@dev.open-bio.org> References: <200907071948.n67JmFT0021344@dev.open-bio.org> Message-ID: <6BF06C91-3F04-483E-B7FA-895F2478E088@illinois.edu> Thanks, missed that one. chris On Jul 7, 2009, at 2:48 PM, Robert Buels wrote: > Revision: 15840 > Author: rbuels > Date: 2009-07-07 15:48:15 -0400 (Tue, 07 Jul 2009) > > Log Message: > ----------- > fixed stray output file emanating from fastq.t. was leaving garbage > files around in root dir when running test > > Modified Paths: > -------------- > bioperl-live/trunk/t/SeqIO/fastq.t > > Modified: bioperl-live/trunk/t/SeqIO/fastq.t > =================================================================== > --- bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-07 19:46:28 UTC (rev > 15839) > +++ bioperl-live/trunk/t/SeqIO/fastq.t 2009-07-07 19:48:15 UTC (rev > 15840) > @@ -137,7 +137,7 @@ > ); > > while ( my ( $variant, $data ) = each %format ) { > - my $outfile = "$variant.fastq"; > + my $outfile = test_output_file(); > my ( $file, $total ) = @$data; > $file = test_input_file( 'fastq', $file ); > my $in = Bio::SeqIO->new( > > > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l From bugzilla-daemon at portal.open-bio.org Wed Jul 8 13:46:26 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 8 Jul 2009 13:46:26 -0400 Subject: [Bioperl-guts-l] [Bug 2875] New: Bio::Align::AlignI POD confusion: 0- vs 1-indexed positions Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2875 Summary: Bio::Align::AlignI POD confusion: 0- vs 1-indexed positions Product: BioPerl Version: main-trunk Platform: HP URL: http://code.open- bio.org/svnweb/index.cgi/bioperl/view/bioperl- live/trunk/Bio/Align/AlignI.pm OS/Version: Linux Status: NEW Severity: minor Priority: P2 Component: Documentation AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: charles.tilford at bms.com Minor POD bug in Bio::Align::AlignI can lead to confusion over 0- vs 1-indexed positions: Title : column_from_residue_number Seq1/91-97 AC..DEF.GH Seq2/24-30 ACGG.RTY.. Seq3/43-51 AC.DDEFGHI column_from_residue_number( "Seq1", 94 ) returns 5. column_from_residue_number( "Seq2", 25 ) returns 2. column_from_residue_number( "Seq3", 50 ) returns 9. The first code example shows a return value of 5, but if the method is returning 1-indexed column numbers then it should return 6 (character "E"). The other two examples are appropriate for 1-indexed positions. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From maj at dev.open-bio.org Wed Jul 8 23:17:45 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 8 Jul 2009 23:17:45 -0400 Subject: [Bioperl-guts-l] [15841] bioperl-live/trunk/Bio/Align/AlignI.pm: bug 2875--thanks Charles Message-ID: <200907090317.n693Hjp0026300@dev.open-bio.org> Revision: 15841 Author: maj Date: 2009-07-08 23:17:44 -0400 (Wed, 08 Jul 2009) Log Message: ----------- bug 2875--thanks Charles Modified Paths: -------------- bioperl-live/trunk/Bio/Align/AlignI.pm Modified: bioperl-live/trunk/Bio/Align/AlignI.pm =================================================================== --- bioperl-live/trunk/Bio/Align/AlignI.pm 2009-07-07 19:48:15 UTC (rev 15840) +++ bioperl-live/trunk/Bio/Align/AlignI.pm 2009-07-09 03:17:44 UTC (rev 15841) @@ -735,7 +735,7 @@ Seq2/24-30 ACGG.RTY.. Seq3/43-51 AC.DDEFGHI - column_from_residue_number( "Seq1", 94 ) returns 5. + column_from_residue_number( "Seq1", 94 ) returns 6. column_from_residue_number( "Seq2", 25 ) returns 2. column_from_residue_number( "Seq3", 50 ) returns 9. From bugzilla-daemon at portal.open-bio.org Wed Jul 8 23:18:19 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 8 Jul 2009 23:18:19 -0400 Subject: [Bioperl-guts-l] [Bug 2875] Bio::Align::AlignI POD confusion: 0- vs 1-indexed positions In-Reply-To: Message-ID: <200907090318.n693IJuG012858@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2875 maj at fortinbras.us changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #1 from maj at fortinbras.us 2009-07-08 23:18 EST ------- Pod fixed-- thanks Charles! MAJ -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 9 13:30:48 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Jul 2009 13:30:48 -0400 Subject: [Bioperl-guts-l] [Bug 2876] New: cdd_search with perl (remote blast) ? Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2876 Summary: cdd_search with perl (remote blast) ? Product: BioPerl Version: unspecified Platform: PC OS/Version: Windows XP Status: NEW Severity: normal Priority: P2 Component: Unclassified AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: jonas_schaer at gmx.de Hi there. Well, here is my problem: I wrote a perlskript which runs a remote blast search of a single sequence. Now I want to include a search for conserved domains (put-parameter: cdd_search)(db: CDD, prog: 'rpsblast'). my skript so far (just for blasting a sequence against 'nr'): ################################################################################# #!/usr/bin/perl use Bio::Seq::SeqFactory; use Bio::Tools::Run::RemoteBlast; use strict; my @blast_report; my $prog = 'blastp'; my $db = 'nr'; my $e_val= '1e-10'; my $self; #my $e_val= '10'; my @params = ( '-prog' => $prog, '-data' => $db, '-expect' => $e_val, '-readmethod' => 'SearchIO' ); my $factory = Bio::Tools::Run::RemoteBlast->new(@params); $Bio::Tools::Run::RemoteBlast::HEADER{'GAPCOSTS'} = '11 1'; $Bio::Tools::Run::RemoteBlast::HEADER{'MAX_NUM_SEQ'} = '100'; $Bio::Tools::Run::RemoteBlast::HEADER{'EXPECT'} = '10'; $Bio::Tools::Run::RemoteBlast::HEADER{'COMPOSITION_BASED_STATISTICS'} = '2'; my %put = ( WORD_SIZE => 3, HITLIST_SIZE => 100, THRESHOLD => 11, # COMPOSITION_BASED_STATISTICS =>2, FILTER => 'R', #''L'' for Low Complexity, ''R'' for Human Repeats, ''m'' for Mask for Lookup GENETIC_CODE => 1 ); for my $putName (keys %put) { $factory->submit_parameter($putName,$put{$putName}); } my $blast_seq='MGSSSVGTYHLLLVLMGAGGEQQAVQAGAEVASTEQVDGSGMAANSRGSTSGSEQPPRDSDLGLLRSLLDVAGVDRTALEVKLLALAEAGAEMPPAQDSQATAAGVVATLTSVYRQQVARAWHERDDNAFRQAHQNTAMATGPDPDDEYE'; #$v is just to turn on and off the messages my $v = 1; my $seqbuilder = Bio::Seq::SeqFactory->new('-type' => 'Bio::PrimarySeq'); my $seq = $seqbuilder->create(-seq =>$blast_seq, -display_id => "$blast_seq"); my $filename='temp2.out'; my $r = $factory->submit_blast($seq); print STDERR "waiting..." if( $v > 0 ); while ( my @rids = $factory->each_rid ) { foreach my $rid ( @rids ) { my $rc = $factory->retrieve_blast($rid); if( !ref($rc) ) { if( $rc < 0 ) { $factory->remove_rid($rid); } print STDERR "." if ( $v > 0 ); } else { my $result = $rc->next_result(); $factory->save_output($filename); $factory->remove_rid($rid); print "\nQuery Name: ", $result->query_name(), "\n"; while ( my $hit = $result->next_hit ) { next unless ( $v > 0); print "\thit name is ", $hit->name, "\n"; while( my $hsp = $hit->next_hsp ) { print "\t\tscore is ", $hsp->score, "\n"; } } } } } @blast_report = get_file_data ($filename); return @blast_report; ################################################################################# Switching the put-parameters "prog" to 'rpsblast' and "db" to 'CDD' doesn't work: ------------- EXCEPTION: Bio::Root::Exception ------------- MSG: Value rpsblast for PUT parameter PROGRAM does not match expression t?blast [pnx]. Rejecting. STACK: Error::throw STACK: Bio::Root::Root::throw C:/Perl/site/lib/Bio/Root/Root.pm:359 STACK: Bio::Tools::Run::RemoteBlast::submit_parameter C:/Perl/site/lib/Bio/Tools /Run/RemoteBlast.pm:329 STACK: Bio::Tools::Run::RemoteBlast::new C:/Perl/site/lib/Bio/Tools/Run/RemoteBl ast.pm:257 STACK: blast_a_seqcdd.pm:14 ----------------------------------------------------------- I understand I probably have to write a new routine to do a cdd-blast ... I hope it's ok to quote from chris' answer (thank u btw :) and thanks to malcom, too ): chris wrote: "I'm not sure, but I think adding this in will take a little work (we'll need to catch the RID returned, which I'm fairly sure will require some modifications to checking the returned output). I would also have to look at the RemoteBlast API to see how this would fit in (I'm assuming we could either lump it in with other returned RIDs or create a new method for that)." Can somebody help me? thanks in advance! regards, jonas -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 9 14:28:33 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Jul 2009 14:28:33 -0400 Subject: [Bioperl-guts-l] [Bug 2877] New: [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2877 Summary: [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() Product: BioPerl Version: 1.6 branch Platform: PC OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Core Components AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: tristan.lefebure at gmail.com CC: tristan.lefebure at gmail.com Hello, I have been bumping into problems while rerooting trees that contained bootstrap scores. Basically, after re-rooting the tree, some scores end-up at the wrong place (i.e. node) and some nodes lose their score. I found this thread from Bank Beszter, back in 2007, that exactly explains the same problems: http://lists.open-bio.org/pipermail/bioperl-l/2007- May/025599.html Below I paste a script that reproduces the bug and implements the fix that Bank described (at least this is my understanding, and it works on this example): #! /usr/bin/perl use strict; use warnings; use Bio::TreeIO; my $in = Bio::TreeIO->new(-format => 'newick', -fh => \*DATA, -internal_node_id => 'bootstrap'); my $out = Bio::TreeIO->new(-format => 'newick', -file => ">out.tree"); while( my $t = $in->next_tree ){ my $old_root = $t->get_root_node(); my ($b) = $t->find_node(-id =>"B"); my $b_anc = $b->ancestor; $out->write_tree($t); #reroot with B -> wrong, and the tree is kind of weird $t->reroot($b); $out->write_tree($t); #reroot with B ancestor -> wrong $t->reroot($b_anc); $out->write_tree($t); #a fix, following Bank Beszteri description my $node = $old_root; while (my $anc_node = $node->ancestor) { $node->bootstrap($anc_node->bootstrap()); $anc_node->bootstrap(''); $node = $anc_node; } $out->write_tree($t); #->good this time } __DATA__ (A:52,(B:46,C:50)68:11,D:70); Here is the output: (A:52,(B:46,C:50)68:11,D:70); ((C:50,(A:52,D:70):11)68:46)B; (B:46,C:50,(A:52,D:70):11)68; (B:46,C:50,(A:52,D:70)68:11); Tree #2 and #3 have the score 68 moved to the wrong node, while tree #4 is OK. (BTW tree #2 is really weird, except if B, is the real ancestor (a fossil ?), it really does not make much sense to me). My understanding here is that the problem is linked to the well-known difficulty to differentiate node from branch labels in newick trees. Bootstrap scores are branch attributes not node attributes, but since Bio::TreeI has no branch/edge/bipartition object they are attached to a node, and in fact reflects the bootstrap score of the ancestral branch leading to that node. Troubles naturally come when you are dealing with an unrooted tree or reroot a tree: a child can become an ancestor, and, if the bootstrap scores is not moved from the old child to the new child, it will end up attached at the wrong place (i.e. wrong node). I see several possible fix to that: 1- incorporate Bank's fix into the root() method. I.e. if there is bootstrap score, after re-rooting, the one on the old to new ancestor path, should be moved to the right node. 2- Modify the way trees are stored in bioperl to incorporate branch/edge/bipartition object, and move the bootstrap scores to them. That won't be easy and will break many things... What do you think? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 9 15:14:13 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Jul 2009 15:14:13 -0400 Subject: [Bioperl-guts-l] [Bug 2876] cdd_search with perl (remote blast) ? In-Reply-To: Message-ID: <200907091914.n69JEDjf020110@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2876 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Severity|normal |enhancement Target Milestone|--- |1.6.x point release ------- Comment #1 from cjfields at bioperl.org 2009-07-09 15:14 EST ------- Moving to an enhancement request and tentatively planning for the 1.6 release series when tuits are available. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 9 15:39:01 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Jul 2009 15:39:01 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907091939.n69Jd1Dm021223@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 maj at fortinbras.us changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |ASSIGNED ------- Comment #1 from maj at fortinbras.us 2009-07-09 15:39 EST ------- Tristan's insight " I just add a quick look at the reroot() function of TreeFunctionsI, and it looks like that what should be done for the bootstrap scores is what is already done for the branch lengths. See this loop starting line 954: # reverse the ancestor & children pointers my $former_anc = $tmp_node->ancestor; my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node); for (my $i = 0; $i < @path_from_oldroot - 1; $i++) { my $current = $path_from_oldroot[$i]; my $next = $path_from_oldroot[$i + 1]; $current->remove_Descendent($next); $current->branch_length($next->branch_length); $next->add_Descendent($current); } It makes sense to me to treat bootstrap and branch lenght in a similar way: the branch lengths are stored inside the node object, but as the bootstrap, they really are branch attributes... Nope? -Tristan " -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 9 15:39:27 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Jul 2009 15:39:27 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907091939.n69JdROf021260@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 maj at fortinbras.us changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |bioperl-guts-l at bioperl.org -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Thu Jul 9 16:46:36 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Thu, 9 Jul 2009 16:46:36 -0400 Subject: [Bioperl-guts-l] [15842] bioperl-live/trunk/Bio/TreeIO.pm: * POD fix per Jay Hannah Message-ID: <200907092046.n69KkaLp030158@dev.open-bio.org> Revision: 15842 Author: cjfields Date: 2009-07-09 16:46:35 -0400 (Thu, 09 Jul 2009) Log Message: ----------- * POD fix per Jay Hannah * untabify Modified Paths: -------------- bioperl-live/trunk/Bio/TreeIO.pm Modified: bioperl-live/trunk/Bio/TreeIO.pm =================================================================== --- bioperl-live/trunk/Bio/TreeIO.pm 2009-07-09 03:17:44 UTC (rev 15841) +++ bioperl-live/trunk/Bio/TreeIO.pm 2009-07-09 20:46:35 UTC (rev 15842) @@ -21,9 +21,9 @@ { use Bio::TreeIO; my $treeio = Bio::TreeIO->new('-format' => 'newick', - '-file' => 'globin.dnd'); + '-file' => 'globin.dnd'); while( my $tree = $treeio->next_tree ) { - print "Tree is ", $tree->size, "\n"; + print "Tree is ", $tree->number_nodes, "\n"; } } @@ -121,21 +121,21 @@ # or do we want to call SUPER on an object if $caller is an # object? if( $class =~ /Bio::TreeIO::(\S+)/ ) { - my ($self) = $class->SUPER::new(@args); - $self->_initialize(@args); - return $self; + my ($self) = $class->SUPER::new(@args); + $self->_initialize(@args); + return $self; } else { - my %param = @args; - @param{ map { lc $_ } keys %param } = values %param; # lowercase keys - my $format = $param{'-format'} || - $class->_guess_format( $param{'-file'} || $ARGV[0] ) || - 'newick'; - $format = "\L$format"; # normalize capitalization to lower case - - # normalize capitalization - return unless( $class->_load_format_module($format) ); - return "Bio::TreeIO::$format"->new(@args); + my %param = @args; + @param{ map { lc $_ } keys %param } = values %param; # lowercase keys + my $format = $param{'-format'} || + $class->_guess_format( $param{'-file'} || $ARGV[0] ) || + 'newick'; + $format = "\L$format"; # normalize capitalization to lower case + + # normalize capitalization + return unless( $class->_load_format_module($format) ); + return "Bio::TreeIO::$format"->new(@args); } } @@ -186,7 +186,7 @@ my ($self,$handler) = @_; return if( ! $handler ); if( ! $handler->isa('Bio::Event::EventHandlerI') ) { - $self->warn("Ignoring request to attach handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI'); + $self->warn("Ignoring request to attach handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI'); } $self->{'_handler'} = $handler; return; @@ -214,12 +214,12 @@ my $internal_node_id; $self->{'internal_node_id'} = INTERNAL_NODE_ID; ($self->{'newline_each_node'},$internal_node_id) = $self->_rearrange - ([qw(NEWLINE_EACH_NODE INTERNAL_NODE_ID)], at args); + ([qw(NEWLINE_EACH_NODE INTERNAL_NODE_ID)], at args); # initialize the IO part $self->_initialize_io(@args); $self->attach_EventHandler(Bio::TreeIO::TreeEventBuilder->new - (-verbose => $self->verbose(), @args)); + (-verbose => $self->verbose(), @args)); $self->internal_node_id($internal_node_id) if defined $internal_node_id; } @@ -289,14 +289,14 @@ my $self = shift; my $val = shift; if( defined $val ) { - if( $val =~ /^b/i ) { - $val = 'bootstrap'; - } elsif( $val =~ /^i/ ) { - $val = 'id'; - } else { - $self->warn("Unknown value $val for internal_node_id not resetting value\n"); - } - return $self->{'internal_node_id'} = $val; + if( $val =~ /^b/i ) { + $val = 'bootstrap'; + } elsif( $val =~ /^i/ ) { + $val = 'id'; + } else { + $self->warn("Unknown value $val for internal_node_id not resetting value\n"); + } + return $self->{'internal_node_id'} = $val; } return $self->{'internal_node_id'}; } From bugzilla-daemon at portal.open-bio.org Thu Jul 9 20:12:06 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Jul 2009 20:12:06 -0400 Subject: [Bioperl-guts-l] [Bug 2878] New: update PLATFORMS file Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2878 Summary: update PLATFORMS file Product: BioPerl Version: main-trunk Platform: All URL: http://bioperl.open-bio.org/SRC/bioperl-live/PLATFORMS OS/Version: Mac OS Status: NEW Severity: normal Priority: P2 Component: Documentation AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: koenvanderdrift at gmail.com The PLATFORMS document contains a *very* outdated link on how to install bioperl on Macs. Please remove this link: "Steve Cannon has made available Bioperl OS X installation directions and notes online at the following URL: http://www.tc.umn.edu/~cann0010/Bioperl_OSX_install.html" -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 9 21:18:26 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Jul 2009 21:18:26 -0400 Subject: [Bioperl-guts-l] [Bug 2878] update PLATFORMS file In-Reply-To: Message-ID: <200907100118.n6A1IQaR031812@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2878 ------- Comment #1 from cjfields at bioperl.org 2009-07-09 21:18 EST ------- I think we could actually remove this file completely. It hasn't been updated in quite a while and any information it contains would probably serve a better purpose elsewhere. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Fri Jul 10 03:37:34 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Jul 2009 03:37:34 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907100737.n6A7bYgL011353@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 ------- Comment #7 from jiandingzhe at msn.com 2009-07-10 03:37 EST ------- (In reply to comment #5) > This is happening b/c, in your code example, the returned Tree is > garbage-collected (and with it all node references to one another). Since you > have assigned the root node to a variable, it is retained but everything else > disappears. Thus you only end up with the root node and nothing else. > > I can leave this open for now, but it isn't a major bug if you call in the > right context. We should (at the least) add a note about this to > Bio::Tree::Tree if it isn't already present. > Actually, I was trapped by this bug because it is usual to use SearchIO in this linked-method-calling way: my $result = Bio::SearchIO->new(-file=>$somefile)->next_result; Anyway, thanks for a lot! -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Fri Jul 10 08:02:26 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Jul 2009 08:02:26 -0400 Subject: [Bioperl-guts-l] [Bug 2869] Bio::Tree, fail to get children of root node through some way In-Reply-To: Message-ID: <200907101202.n6AC2QVf021601@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2869 ------- Comment #8 from cjfields at bioperl.org 2009-07-10 08:02 EST ------- (In reply to comment #7) > > Actually, I was trapped by this bug because it is usual to use SearchIO in this > linked-method-calling way: > > my $result = Bio::SearchIO->new(-file=>$somefile)->next_result; > > Anyway, thanks for a lot! Even though this is particular to TreeIO/Bio::Tree::Tree, I wouldn't consider the SearchIO idiom good for the same reason. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Fri Jul 10 11:14:51 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 10 Jul 2009 11:14:51 -0400 Subject: [Bioperl-guts-l] [15843] bioperl-live/trunk/PLATFORMS: [bug 2878] Message-ID: <200907101514.n6AFEp8m032714@dev.open-bio.org> Revision: 15843 Author: cjfields Date: 2009-07-10 11:14:50 -0400 (Fri, 10 Jul 2009) Log Message: ----------- [bug 2878] * remove PLATFORMS from the distribution (file was redundant and out-of-date) Removed Paths: ------------- bioperl-live/trunk/PLATFORMS Deleted: bioperl-live/trunk/PLATFORMS =================================================================== --- bioperl-live/trunk/PLATFORMS 2009-07-09 20:46:35 UTC (rev 15842) +++ bioperl-live/trunk/PLATFORMS 2009-07-10 15:14:50 UTC (rev 15843) @@ -1,55 +0,0 @@ -# $Id: PLATFORMS,v 1.27 2006-11-23 11:39:01 sendu Exp $ - -Perl general comments: - - o Perl must be 5.6.1 or higher, with perl > 5.8.1 highly recommended. - perl 5.10 also works. - - o Index.t will fail if you have an out-of-date DBM file - installation or a bad DB_File installation - - -Tested systems & OS Specific Comments or Warnings -================================================== - -Machine : Debian Linux 2.6.8-2-686-sm -Perl : 5.8.7 -Comments: none - -Machine : Gentoo Linux 2.6.16-r9 x86_64 -Perl : 5.8.8 -Comments: none - -Machine : FreeBSD 6.2-PRERELEASE i386 and FreeBSD 5.5-STABLE i386 -Perl : 5.8.8 -Comments: none - -Machine : Win32, WinNT i386, Windows XP -Perl : ActiveState Perl 5.8.8.819 -Comments: Only ActiveState Perl >= 5.8 is known to work well, unlike other - platforms that can use perl 5.6.1. - Be sure that the module DB_File is installed and up-to-date - to allow Bio::Index modules to work properly. - Installing ppm's IO-stringy and IO-String and File-Temp are - necessary as well. - - See INSTALL.WIN for more information - -Machine : MacOS -Perl : MacPerl -Comments: We don't recommend using Bioperl on MacOS 9 systems - -Machine : MacOS X 10.4.7 (Intel) and 10.4.8 -Perl : 5.8.6 -Comments: Steve Cannon has made available Bioperl OS X installation - directions and notes online at the following URL: - http://www.tc.umn.edu/~cann0010/Bioperl_OSX_install.html - Also see the Unix installation instructions at: - http://www.bioperl.org/wiki/Installing_Bioperl_for_Unix - Or install using CPAN. - -Machine : CentOS -Perl : n/a -Comments: Module::Build, required for installation using Build.PL, may - have difficulty installing. You can force install it with - CPAN. From bugzilla-daemon at portal.open-bio.org Fri Jul 10 11:19:51 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Jul 2009 11:19:51 -0400 Subject: [Bioperl-guts-l] [Bug 2878] update PLATFORMS file In-Reply-To: Message-ID: <200907101519.n6AFJp4C029642@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2878 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #2 from cjfields at bioperl.org 2009-07-10 11:19 EST ------- File removed from core in r15843. Thanks! -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From lstein at dev.open-bio.org Sat Jul 11 10:17:17 2009 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Sat, 11 Jul 2009 10:17:17 -0400 Subject: [Bioperl-guts-l] [15844] bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm: made searching for location & type more memory efficient and possibly faster Message-ID: <200907111417.n6BEHH7t010657@dev.open-bio.org> Revision: 15844 Author: lstein Date: 2009-07-11 10:17:16 -0400 (Sat, 11 Jul 2009) Log Message: ----------- made searching for location & type more memory efficient and possibly faster Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm 2009-07-10 15:14:50 UTC (rev 15843) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm 2009-07-11 14:17:16 UTC (rev 15844) @@ -1022,7 +1022,8 @@ for (my $status = $db->seq($key,$value,R_CURSOR); $status == 0 and $key =~ /^$regexp$/i; $status = $db->seq($key,$value,R_NEXT)) { - push @results,$value; + next if %$filter && !$filter->{$value}; # don't bother + push @results,$value; } $self->update_filter($filter,\@results); } @@ -1050,12 +1051,26 @@ my $key = lc "$primary_tag:$source_tag"; my $value; - for (my $status = $db->seq($key,$value,R_CURSOR); - $status == 0 && $key =~ /$match/i; - $status = $db->seq($key,$value,R_NEXT)) { - push @results,$value; + my $status = $db->seq($key,$value,R_CURSOR); # get first real key + my $count = $db->get_dup($key); + + # it will be faster to fetch each object + if (%$filter && $count > 2 * keys %$filter) { + for my $id (keys %$filter) { + my $obj = $self->_fetch($id) or next; + push @results,$id if $obj->type =~ /$match/i; + } + } + else { + for (my $status = $db->seq($key,$value,R_CURSOR); + $status == 0 && $key =~ /$match/i; + $status = $db->seq($key,$value,R_NEXT)) { + next if %$filter && !$filter->{$value}; # don't even bother + push @results,$value; + } + } } $self->update_filter($filter,\@results); } @@ -1081,6 +1096,7 @@ my $key = "\L$seq_id\E.$binstart"; my $keystop = "\L$seq_id\E.$binend"; my $value; + for (my $status = $db->seq($key,$value,R_CURSOR); $status == 0 && $key le $keystop; $status = $db->seq($key,$value,R_NEXT)) { @@ -1093,6 +1109,7 @@ elsif ($range_type eq 'contains') { next unless $fstart >= $start && $fend <= $end; } + next if %$filter && !$filter->{$id}; # don't bother push @results,$id; } } @@ -1112,6 +1129,7 @@ next if $seenit{$id}++; next if $strand && $fstrand != $strand; next unless $fstart <= $start && $fend >= $end; + next if %$filter && !$filter->{$id}; # don't bother push @results,$id; } @@ -1124,6 +1142,7 @@ next if $seenit{$id}++; next if $strand && $fstrand != $strand; next unless $fstart <= $start && $fend >= $end; + next if %$filter && !$filter->{$id}; # don't bother push @results,$id; } @@ -1161,7 +1180,8 @@ for (my $status = $db->seq($key,$value,R_CURSOR); $status == 0 && $key =~ /^$att_name:$regexp$/i; $status = $db->seq($key,$value,R_NEXT)) { - push @result,$value; + next if %$filter && !$filter->{$value}; # don't bother + push @result,$value; } } $result ||= $self->update_filter($filter,\@result); From bugzilla-daemon at portal.open-bio.org Sat Jul 11 12:04:18 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 11 Jul 2009 12:04:18 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907111604.n6BG4IR3014492@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #2 from maj at fortinbras.us 2009-07-11 12:04 EST ------- I've applied the spirit of Tristan's fix, but being careful to treat the bootstrap attribute as it is (a tag)-- so instead of setting ->bootstrap(''), which leaves the 'B' tag on the object, we ->remove_tag('B'). The "weirdness" of the tree ((C:50,(A:52,D:70):11)68:46)B; is only unfamiliarity. If you reroot *on the node B*, then that node *is* the root, following my mods to B:T:Node. See the interminable discussion at http://lists.open-bio.org/pipermail/bioperl-l/2009-February/029180.html In the old way, a fake node was created at the midpoint of the branch between B and its ancestor, and this was made the root. This is exceedingly arbitrary, when you consider that "rootiness" is a property of a node and not a branch. It is only a convenience to say "root on this branch" -- this means "create a node in this branch and make that node the new root". The new method B:T:Node::create_node_on_branch creates the node, which then can be made the new root. The new process allowed the elimination of a bug or two (possibly a manifestation of the present bugs, but fixed some others found in the process; see http://bugzilla.bioperl.org/show_bug.cgi?id=2456) In the new tests associated with this bug, there is a demo of this whole process. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From maj at dev.open-bio.org Sat Jul 11 12:05:17 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Sat, 11 Jul 2009 12:05:17 -0400 Subject: [Bioperl-guts-l] [15845] bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm: bug 2877, a la Tristan -- thanks! Message-ID: <200907111605.n6BG5HTx010782@dev.open-bio.org> Revision: 15845 Author: maj Date: 2009-07-11 12:05:17 -0400 (Sat, 11 Jul 2009) Log Message: ----------- bug 2877, a la Tristan -- thanks! Modified Paths: -------------- bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm Modified: bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm =================================================================== --- bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm 2009-07-11 14:17:16 UTC (rev 15844) +++ bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm 2009-07-11 16:05:17 UTC (rev 15845) @@ -959,11 +959,14 @@ my $next = $path_from_oldroot[$i + 1]; $current->remove_Descendent($next); $current->branch_length($next->branch_length); + $current->bootstrap($next->bootstrap) if defined $next->bootstrap; + $next->remove_tag('B'); $next->add_Descendent($current); } $new_root->add_Descendent($former_anc); $tmp_node->remove_Descendent($former_anc); + $tmp_node = undef; $new_root->branch_length(undef); From maj at dev.open-bio.org Sat Jul 11 12:24:02 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Sat, 11 Jul 2009 12:24:02 -0400 Subject: [Bioperl-guts-l] [15846] bioperl-live/trunk/t/Tree/Node.t: tests for bug 2877 Message-ID: <200907111624.n6BGO28a010933@dev.open-bio.org> Revision: 15846 Author: maj Date: 2009-07-11 12:24:01 -0400 (Sat, 11 Jul 2009) Log Message: ----------- tests for bug 2877 Modified Paths: -------------- bioperl-live/trunk/t/Tree/Node.t Modified: bioperl-live/trunk/t/Tree/Node.t =================================================================== --- bioperl-live/trunk/t/Tree/Node.t 2009-07-11 16:05:17 UTC (rev 15845) +++ bioperl-live/trunk/t/Tree/Node.t 2009-07-11 16:24:01 UTC (rev 15846) @@ -2,15 +2,19 @@ # $Id$ use strict; +use vars qw($GOT_SEEKABLE); BEGIN { use lib '.'; use Bio::Root::Test; - - test_begin(-tests => 34); + use File::Temp qw(tempfile); + $GOT_SEEKABLE = 0; + $GOT_SEEKABLE ||= eval "require IO::Seekable; 1"; + test_begin(-tests => 40); use_ok('Bio::Tree::Node'); use_ok('Bio::Tree::AlleleNode'); + use_ok('Bio::TreeIO'); } my $node1 = Bio::Tree::Node->new(); @@ -77,3 +81,61 @@ my ($a2,$a3) = $allele_node->get_Genotypes(-marker => 'm4')->get_Alleles; is($a2, 0); is($a3, 4); +# bug 2877 +SKIP : { + skip 'IO::Seekable reqd', 5 unless $GOT_SEEKABLE; + package IO::Handle; + our @ISA; + push @ISA, 'IO::Seekable'; + package main; + my ($tf,$tfn) = tempfile(); + open my $f, ">$tfn"; + print $f "(A:52,(B:46,C:50):11,D:70)68\n"; + close $f; + my $in = Bio::TreeIO->new(-format => 'newick', + -file => $tfn, + -internal_node_id => 'bootstrap'); + my ($outf,$outfn) = tempfile(); + open my $of, "+>".$outfn; + + my $out = Bio::TreeIO->new(-format => 'newick', + -fh => $of); + + while( my $t = $in->next_tree ){ + my $s; + my $old_root = $t->get_root_node(); + my ($b) = $t->find_node(-id =>"B"); + my $b_anc = $b->ancestor; + + my $r = $b->create_node_on_branch(-FRACTION=>0.5); + $r->id('fake'); + # before reroot + $out->write_tree($t); + $of->seek(0,0); + is( <$of>, "(A:52,(C:50,(B:23)fake:23):11,D:70)68;\n", 'with fake node'); + # after reroot + $t->reroot($r); + + $out->write_tree($t); + $of->seek(0,0); + is( <$of>, "(A:52,(C:50,(B:23)fake:23):11,D:70)68;\n", "after reroot on fake node"); + $t->reroot($b); + $of->seek(0,0); + $out->write_tree($t); + $of->seek(0,0); + is( <$of>, "(((C:50,(A:52,D:70)68:11):23)fake:23)B;\n", "reroot on B"); + + $t->reroot($b_anc); + $t->splice(-remove_id=>'fake'); + $of->seek(0,0); + $out->write_tree($t); + $of->seek(0,0); + is( <$of>, "(B:23,C:50,(A:52,D:70)68:11);\n", "remove fake node, reroot on former B anc"); + $t->reroot($old_root); + $of->seek(0,0); + $out->write_tree($t); + $of->seek(0,0); + is( <$of> ,"(A:52,(B:23,C:50):11,D:70)68;\n", "roundtrip"); + $of->close; + } +} From bugzilla-daemon at portal.open-bio.org Sat Jul 11 12:25:50 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 11 Jul 2009 12:25:50 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907111625.n6BGPoMC015465@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 maj at fortinbras.us changed: What |Removed |Added ---------------------------------------------------------------------------- Status|ASSIGNED |RESOLVED Resolution| |FIXED ------- Comment #3 from maj at fortinbras.us 2009-07-11 12:25 EST ------- commited fix at r15845, tests at r15846 -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From maj at dev.open-bio.org Sat Jul 11 15:32:55 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Sat, 11 Jul 2009 15:32:55 -0400 Subject: [Bioperl-guts-l] [15847] bioperl-live/trunk/Bio/Tree/Tree.pm: adding convenience function ' as_text', Message-ID: <200907111932.n6BJWtwg011225@dev.open-bio.org> Revision: 15847 Author: maj Date: 2009-07-11 15:32:55 -0400 (Sat, 11 Jul 2009) Log Message: ----------- adding convenience function 'as_text', will deliver tree as a string in desired format (directly computed by the appropriate Bio::TreeIO modules) Modified Paths: -------------- bioperl-live/trunk/Bio/Tree/Tree.pm Modified: bioperl-live/trunk/Bio/Tree/Tree.pm =================================================================== --- bioperl-live/trunk/Bio/Tree/Tree.pm 2009-07-11 16:24:01 UTC (rev 15846) +++ bioperl-live/trunk/Bio/Tree/Tree.pm 2009-07-11 19:32:55 UTC (rev 15847) @@ -89,6 +89,7 @@ Aaron Mackey amackey at virginia.edu Sendu Bala bix at sendu.me.uk +Mark A. Jensen maj at fortinbras.us =head1 APPENDIX @@ -357,6 +358,61 @@ =cut +=head2 as_text + + Title : as_text + Usage : my $tree_as_string = $tree->as_text($format) + Function: Returns the tree as a string representation in the + desired format (currently 'newick', 'nhx', or + 'tabtree') + Returns : scalar string + Args : format type as specified by Bio::TreeIO + Note : This method loads the Bio::TreeIO::$format module + on the fly, and commandeers the _write_tree_Helper + routine therein to create the tree string. + +=cut + +sub as_text { + my $self = shift; + my $format = shift; + my @parms; + my $iomod = "Bio::TreeIO::$format"; + $self->_load_module($iomod); + # following currently not really necessary, but who knows? + my $io = $iomod->new(-format=>$format, -file=>File::Spec->devnull()); + no strict "refs"; + my $iowtH = *{$iomod."::_write_tree_Helper"}{CODE}; + use strict "refs"; + for ($format) { + /newick/ && do { + @parms = ( $io->bootstrap_style, $io->order_by, 0 ); + last; + }; + /nhx/ && do { + @parms = ( 0 ); + last; + }; + /tabtree/ && do { + @parms = ( "" ); + last; + }; + # default + $self->throw("as_text does not allow format '$format'") + } + + + # newline_each_node... + my $data = [$iowtH->($self->get_root_node, @parms)]; + + if ($format eq 'tabtree') { + return $$data[0]."\n"; + } + else { + return join(",", @$data).";\n"; + } +} + =head2 Methods for associating Tag/Values with a Tree These methods associate tag/value pairs with a Tree From maj at dev.open-bio.org Sat Jul 11 15:33:45 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Sat, 11 Jul 2009 15:33:45 -0400 Subject: [Bioperl-guts-l] [15848] bioperl-live/trunk/t/Tree/Node.t: use new as_text() method, rather than kludgy Message-ID: <200907111933.n6BJXjl0011256@dev.open-bio.org> Revision: 15848 Author: maj Date: 2009-07-11 15:33:44 -0400 (Sat, 11 Jul 2009) Log Message: ----------- use new as_text() method, rather than kludgy temp out files Modified Paths: -------------- bioperl-live/trunk/t/Tree/Node.t Modified: bioperl-live/trunk/t/Tree/Node.t =================================================================== --- bioperl-live/trunk/t/Tree/Node.t 2009-07-11 19:32:55 UTC (rev 15847) +++ bioperl-live/trunk/t/Tree/Node.t 2009-07-11 19:33:44 UTC (rev 15848) @@ -2,16 +2,12 @@ # $Id$ use strict; -use vars qw($GOT_SEEKABLE); BEGIN { use lib '.'; use Bio::Root::Test; use File::Temp qw(tempfile); - $GOT_SEEKABLE = 0; - $GOT_SEEKABLE ||= eval "require IO::Seekable; 1"; test_begin(-tests => 40); - use_ok('Bio::Tree::Node'); use_ok('Bio::Tree::AlleleNode'); use_ok('Bio::TreeIO'); @@ -82,25 +78,13 @@ is($a2, 0); is($a3, 4); # bug 2877 -SKIP : { - skip 'IO::Seekable reqd', 5 unless $GOT_SEEKABLE; - package IO::Handle; - our @ISA; - push @ISA, 'IO::Seekable'; - package main; - my ($tf,$tfn) = tempfile(); - open my $f, ">$tfn"; - print $f "(A:52,(B:46,C:50):11,D:70)68\n"; - close $f; - my $in = Bio::TreeIO->new(-format => 'newick', - -file => $tfn, - -internal_node_id => 'bootstrap'); - my ($outf,$outfn) = tempfile(); - open my $of, "+>".$outfn; - - my $out = Bio::TreeIO->new(-format => 'newick', - -fh => $of); - +my ($tf,$tfn) = tempfile(); +open my $f, ">$tfn"; +print $f "(A:52,(B:46,C:50):11,D:70)68\n"; +close $f; +my $in = Bio::TreeIO->new(-format => 'newick', + -file => $tfn, + -internal_node_id => 'bootstrap'); while( my $t = $in->next_tree ){ my $s; my $old_root = $t->get_root_node(); @@ -110,32 +94,20 @@ my $r = $b->create_node_on_branch(-FRACTION=>0.5); $r->id('fake'); # before reroot - $out->write_tree($t); - $of->seek(0,0); - is( <$of>, "(A:52,(C:50,(B:23)fake:23):11,D:70)68;\n", 'with fake node'); + is( $t->as_text('newick'), "(A:52,(C:50,(B:23)fake:23):11,D:70)68;\n", 'with fake node'); # after reroot $t->reroot($r); - - $out->write_tree($t); - $of->seek(0,0); - is( <$of>, "(A:52,(C:50,(B:23)fake:23):11,D:70)68;\n", "after reroot on fake node"); + is( $t->as_text('newick'), "(B:23,(C:50,(A:52,D:70)68:11):23)fake;\n", "after reroot on fake node"); $t->reroot($b); - $of->seek(0,0); - $out->write_tree($t); - $of->seek(0,0); - is( <$of>, "(((C:50,(A:52,D:70)68:11):23)fake:23)B;\n", "reroot on B"); + + is( $t->as_text('newick'), "(((C:50,(A:52,D:70)68:11):23)fake:23)B;\n", "reroot on B"); $t->reroot($b_anc); $t->splice(-remove_id=>'fake'); - $of->seek(0,0); - $out->write_tree($t); - $of->seek(0,0); - is( <$of>, "(B:23,C:50,(A:52,D:70)68:11);\n", "remove fake node, reroot on former B anc"); + + is( $t->as_text('newick'), "(B:23,C:50,(A:52,D:70)68:11);\n", "remove fake node, reroot on former B anc"); $t->reroot($old_root); - $of->seek(0,0); - $out->write_tree($t); - $of->seek(0,0); - is( <$of> ,"(A:52,(B:23,C:50):11,D:70)68;\n", "roundtrip"); - $of->close; - } + is( $t->as_text('newick') ,"(A:52,(B:23,C:50):11,D:70)68;\n", "roundtrip"); + } + From scain at dev.open-bio.org Mon Jul 13 11:18:06 2009 From: scain at dev.open-bio.org (Scott Cain) Date: Mon, 13 Jul 2009 11:18:06 -0400 Subject: [Bioperl-guts-l] [15849] bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI: lengthening the typelist. tag field to accomidate longer SO terms with long source fields Message-ID: <200907131518.n6DFI6KG018690@dev.open-bio.org> Revision: 15849 Author: scain Date: 2009-07-13 11:18:05 -0400 (Mon, 13 Jul 2009) Log Message: ----------- lengthening the typelist.tag field to accomidate longer SO terms with long source fields Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm 2009-07-11 19:33:44 UTC (rev 15848) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm 2009-07-13 15:18:05 UTC (rev 15849) @@ -268,7 +268,7 @@ typelist => < < < Revision: 15850 Author: lstein Date: 2009-07-13 15:00:40 -0400 (Mon, 13 Jul 2009) Log Message: ----------- made berkeley adaptor backward compatible with databases created by earlier versions Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm 2009-07-13 15:18:05 UTC (rev 15849) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm 2009-07-13 19:00:40 UTC (rev 15850) @@ -18,6 +18,8 @@ use constant MININT => -999_999_999_999; use constant MAXINT => 999_999_999_999; +our $VERSION = '2.00'; + =head1 NAME Bio::DB::SeqFeature::Store::berkeleydb -- Storage and retrieval of sequence annotation data in Berkeleydb files @@ -186,6 +188,10 @@ -create Pass true to create the index files if they don't exist (implies -write=>1) + -locking Use advisory locking to avoid one process trying to read + from the database while another is updating it (may not + work properly over NFS). + -temp Pass true to create temporary index files that will be deleted once the script exits. @@ -231,6 +237,10 @@ B<-autoindex> is an alias for B<-dir>. +You should specify B<-locking> in a multiuser environment, including +the case in which the database is being used by a web server at the +same time another user might be updating it. + =back See L for all the access methods supported @@ -252,12 +262,14 @@ $write, $create, $verbose, - ) = rearrange([['DSN','DB'], - [qw(DIR AUTOINDEX)], - ['TMP','TEMP','TEMPORARY'], - [qw(WRITE WRITABLE)], - 'CREATE', - 'VERBOSE' + $locking, + ) = rearrange([['DSN','DB'], + [qw(DIR AUTOINDEX)], + ['TMP','TEMP','TEMPORARY'], + [qw(WRITE WRITABLE)], + 'CREATE', + 'VERBOSE', + [qw(LOCK LOCKING)], ], at _); $verbose = 1 unless defined $verbose; @@ -289,6 +301,7 @@ $self->directory($directory); $self->temporary($is_temporary); $self->verbose($verbose); + $self->locking($locking); $self->_delete_databases() if $create; if ($autoindex && -d $autoindex) { $self->auto_reindex($autoindex); @@ -475,7 +488,6 @@ my $mtime = _mtime(\*_); # not a typo $maxtime = $mtime if $mtime > $maxtime; push @gff3,$path; -# push @gff3,$path if $mtime > $timestamp_time; } @@ -483,7 +495,6 @@ my $mtime = _mtime(\*_); # not a typo $maxtime = $mtime if $mtime > $maxtime; push @fff,$path; -# push @fff,$path if $mtime > $timestamp_time; } elsif ($path =~ /\.wig$/i) { @@ -505,7 +516,6 @@ closedir $D; $result->{gff} = \@gff3 if $maxtime > $timestamp_time; -# $result->{gff} = \@gff3 if @gff3; $result->{wig} = \@wig if @wig; $result->{fff} = \@fff if @fff; $result->{fasta}++ if $fasta; @@ -519,6 +529,13 @@ return $d; } +sub locking { + my $self = shift; + my $d = $self->{locking}; + $self->{locking} = shift if @_; + return $d; +} + sub lockfile { my $self = shift; return File::Spec->catfile($self->directory,'lock'); @@ -527,12 +544,13 @@ sub lock { my $self = shift; my $mode = shift; + return unless $self->locking; my $flag = $mode eq 'exclusive' ? LOCK_EX : LOCK_SH; my $lockfile = $self->lockfile; my $fh = $self->_flock_fh; unless ($fh) { - my $open = -e $lockfile ? '<' : '>'; + my $open = -e $lockfile ? '<' : '>'; $fh = IO::File->new($lockfile,$open) or die "Cannot open $lockfile: $!"; } flock($fh,$flag); @@ -541,10 +559,11 @@ sub unlock { my $self = shift; + return unless $self->locking; + my $fh = $self->_flock_fh or return; flock($fh,LOCK_UN); undef $self->{flock_fh}; - unlink $self->lockfile; } sub _flock_fh { @@ -580,6 +599,7 @@ if ($create) { %h = (); $h{'.next_id'} = 1; + $h{'.version'} = $VERSION; } $self->db(\%h); @@ -974,7 +994,7 @@ my @result; unless (defined $name or defined $seq_id or defined $types or defined $attributes) { - @result = grep {$_ ne '.next_id' } keys %{$self->db}; + @result = grep {!/^\./} keys %{$self->db}; } my %found = (); @@ -1051,11 +1071,9 @@ my $key = lc "$primary_tag:$source_tag"; my $value; - my $status = $db->seq($key,$value,R_CURSOR); # get first real key - my $count = $db->get_dup($key); - - # it will be faster to fetch each object - if (%$filter && $count > 2 * keys %$filter) { + # If filter is already provided, then it is usually faster to + # fetch each object. + if (%$filter) { for my $id (keys %$filter) { my $obj = $self->_fetch($id) or next; push @results,$id if $obj->type =~ /$match/i; @@ -1091,10 +1109,11 @@ $start = MININT if !defined $start; $end = MAXINT if !defined $end; + my $version_2 = $self->version > 1; if ($range_type eq 'overlaps' or $range_type eq 'contains') { - my $key = "\L$seq_id\E.$binstart"; - my $keystop = "\L$seq_id\E.$binend"; + my $key = $version_2 ? "\L$seq_id\E.$binstart" : "\L$seq_id\E$binstart"; + my $keystop = $version_2 ? "\L$seq_id\E.$binend" : "\L$seq_id\E$binend"; my $value; for (my $status = $db->seq($key,$value,R_CURSOR); @@ -1117,8 +1136,8 @@ # for contained in, we look for features originating and terminating outside the specified range # this is incredibly inefficient, but fortunately the query is rare (?) elsif ($range_type eq 'contained_in') { - my $key = "\L$seq_id."; - my $keystop = "\L$seq_id\E.$binstart"; + my $key = $version_2 ? "\L$seq_id." : "\L$seq_id"; + my $keystop = $version_2 ? "\L$seq_id\E.$binstart" : "\L$seq_id\E$binstart"; my $value; # do the left part of the range @@ -1348,7 +1367,13 @@ } } +sub version { + my $self = shift; + my $db = $self->db; + return $db->{'.version'} || 1.00; +} + sub DESTROY { my $self = shift; $self->_close_databases(); From bugzilla-daemon at portal.open-bio.org Mon Jul 13 16:36:25 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 13 Jul 2009 16:36:25 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907132036.n6DKaPBm015799@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 tristan.lefebure at gmail.com changed: What |Removed |Added ---------------------------------------------------------------------------- Status|RESOLVED |REOPENED Resolution|FIXED | ------- Comment #4 from tristan.lefebure at gmail.com 2009-07-13 16:36 EST ------- Well, I'm still seeing the same bug: #! /usr/bin/perl use strict; use warnings; use Bio::TreeIO; my $in = Bio::TreeIO->new(-format => 'newick', -fh => \*DATA, -internal_node_id => 'bootstrap'); my $out = Bio::TreeIO->new(-format => 'newick', -file => ">out.tree"); while( my $t = $in->next_tree ){ my $old_root = $t->get_root_node(); my ($b) = $t->find_node(-id =>"B"); my $b_anc = $b->ancestor; $out->write_tree($t); $t->reroot($b_anc); $out->write_tree($t); } __DATA__ (A:52,(B:46,C:50)68:11,D:70); This produces: (A:52,(B:46,C:50)68:11,D:70); (B:46,C:50,(A:52,D:70):11)68; Some comments: - in t/Tree/Node.t, the test tree is: (A:52,(B:46,C:50):11,D:70)68 instead of: (A:52,(B:46,C:50)68:11,D:70) - If you manipulate the tree before rooting it with $b_anc, the bug disappears. For example if you do: $t->reroot($b); $out->write_tree($t); $t->reroot($b_anc); $out->write_tree($t); you get: (A:52,(B:46,C:50)68:11,D:70); ((C:50,(A:52,D:70)68:11):46)B; (B:46,C:50,(A:52,D:70)68:11); Hugh? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Jul 13 17:03:09 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 13 Jul 2009 17:03:09 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907132103.n6DL39p8016750@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #5 from maj at fortinbras.us 2009-07-13 17:03 EST ------- (In reply to comment #4) the tree is D A B C \ \ \ | \ \ \ | \ \ \ | \ \ \ | \ \ \| \ \ +g \ \ | \ \ | Well, I'm still seeing the same bug: > > #! /usr/bin/perl > > use strict; > use warnings; > use Bio::TreeIO; > > > my $in = Bio::TreeIO->new(-format => 'newick', > -fh => \*DATA, > -internal_node_id => 'bootstrap'); > > my $out = Bio::TreeIO->new(-format => 'newick', -file => ">out.tree"); > > while( my $t = $in->next_tree ){ > my $old_root = $t->get_root_node(); > my ($b) = $t->find_node(-id =>"B"); > my $b_anc = $b->ancestor; > $out->write_tree($t); > > $t->reroot($b_anc); > $out->write_tree($t); > > > } > > > __DATA__ > (A:52,(B:46,C:50)68:11,D:70); > > > > This produces: > > (A:52,(B:46,C:50)68:11,D:70); > (B:46,C:50,(A:52,D:70):11)68; > > Some comments: > > - in t/Tree/Node.t, the test tree is: > (A:52,(B:46,C:50):11,D:70)68 > instead of: > (A:52,(B:46,C:50)68:11,D:70) > > - If you manipulate the tree before rooting it with $b_anc, the bug disappears. > For example if you do: > > $t->reroot($b); > $out->write_tree($t); > > $t->reroot($b_anc); > $out->write_tree($t); > > you get: > > (A:52,(B:46,C:50)68:11,D:70); > ((C:50,(A:52,D:70)68:11):46)B; > (B:46,C:50,(A:52,D:70)68:11); > > > > Hugh? > -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Jul 13 17:37:37 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 13 Jul 2009 17:37:37 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907132137.n6DLbba4017958@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #6 from tristan.lefebure at gmail.com 2009-07-13 17:37 EST ------- (In reply to comment #5) I agree that (A:52,(B:46,C:50)68:11,D:70) is equivalent to (B:46,C:50,(A:52,D:70)68:11). Where I have some trouble is with (B:46,C:50,(A:52,D:70):11)68, which, I believe (and figtree seems to read it the same way), attach the bootstrap value to the root of the tree (which does not make sense). In other words: (A:52,(B:46,C:50)68:11,D:70) -----------------A | -| | 68 -------B |-------| | | | -------C | | ---------------D (B:46,C:50,(A:52,D:70):11)68; -----------------A | | 68 |---------------D --| | -------B |-------| | -------C look different to me. Am I missing something here? Thanks, > (In reply to comment #4) > > the tree is > D A B C > \ \ \ | > \ \ \ | > \ \ \ | > \ \ \ | > \ \ \| > \ \ +g > \ \ | > \ \ | \ \ | > \ \| > \----+h > > So, as you pointed out, if the bootstrap is a property of the branch, then > the representations are equivalent, whether the value is associated with > node g (as in (A:52,(B:46,C:50)68:11,D:70); ) or node h ( as in > (B:46,C:50,(A:52,D:70)68:11); ). The lengths are unchanged and correct. > > If you want a canonical form of some kind, then we have to think harder, but > the bug as far as I can see is not there anymore. > > > > Well, I'm still seeing the same bug: > > > > #! /usr/bin/perl > > > > use strict; > > use warnings; > > use Bio::TreeIO; > > > > > > my $in = Bio::TreeIO->new(-format => 'newick', > > -fh => \*DATA, > > -internal_node_id => 'bootstrap'); > > > > my $out = Bio::TreeIO->new(-format => 'newick', -file => ">out.tree"); > > > > while( my $t = $in->next_tree ){ > > my $old_root = $t->get_root_node(); > > my ($b) = $t->find_node(-id =>"B"); > > my $b_anc = $b->ancestor; > > $out->write_tree($t); > > > > $t->reroot($b_anc); > > $out->write_tree($t); > > > > > > } > > > > > > __DATA__ > > (A:52,(B:46,C:50)68:11,D:70); > > > > > > > > This produces: > > > > (A:52,(B:46,C:50)68:11,D:70); > > (B:46,C:50,(A:52,D:70):11)68; > > > > Some comments: > > > > - in t/Tree/Node.t, the test tree is: > > (A:52,(B:46,C:50):11,D:70)68 > > instead of: > > (A:52,(B:46,C:50)68:11,D:70) > > > > - If you manipulate the tree before rooting it with $b_anc, the bug disappears. > > For example if you do: > > > > $t->reroot($b); > > $out->write_tree($t); > > > > $t->reroot($b_anc); > > $out->write_tree($t); > > > > you get: > > > > (A:52,(B:46,C:50)68:11,D:70); > > ((C:50,(A:52,D:70)68:11):46)B; > > (B:46,C:50,(A:52,D:70)68:11); > > > > > > > > Hugh? > > > -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Jul 13 23:43:32 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 13 Jul 2009 23:43:32 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907140343.n6E3hWkp028670@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #7 from maj at fortinbras.us 2009-07-13 23:43 EST ------- I see your point. If you saw a tree marked like that in a paper, you'd say "Whaa?" If the tree in a paper were unrooted, one would say, "Ah, the partition defined by this branch has 68% 'support'" or whatever. Now, in the script we've just been playing with the root, saying "let's pick up the tree here, and let the rest hang down", and when we flatten the tree to a linear text representation, the root gets spread out over the whole text, viz. as parentheses at both ends, with the bootstrap value hanging off the side. This is ugly, but still not invalid I think. But I'm not trying to get out of any work-- What we can do is modify the algorithm to say "if a bootstrap value gets shoved to the root during rerooting, shove it back to the node at the other end of the branch." That would give us a canonical form that's less off-putting...sound good? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From chmille4 at dev.open-bio.org Tue Jul 14 10:39:55 2009 From: chmille4 at dev.open-bio.org (Chase Miller) Date: Tue, 14 Jul 2009 10:39:55 -0400 Subject: [Bioperl-guts-l] [15851] bioperl-dev/trunk/t: Added More test for nexml, focusing on tests for writing a complete nexml document Message-ID: <200907141439.n6EEdtgu021671@dev.open-bio.org> Revision: 15851 Author: chmille4 Date: 2009-07-14 10:39:53 -0400 (Tue, 14 Jul 2009) Log Message: ----------- Added More test for nexml, focusing on tests for writing a complete nexml document Modified Paths: -------------- bioperl-dev/trunk/t/AlignIO/nexml.t bioperl-dev/trunk/t/SeqIO/nexml.t bioperl-dev/trunk/t/TreeIO/nexml.t bioperl-dev/trunk/t/nexml.t Modified: bioperl-dev/trunk/t/AlignIO/nexml.t =================================================================== --- bioperl-dev/trunk/t/AlignIO/nexml.t 2009-07-13 19:00:40 UTC (rev 15850) +++ bioperl-dev/trunk/t/AlignIO/nexml.t 2009-07-14 14:39:53 UTC (rev 15851) @@ -9,7 +9,7 @@ # this passes if $object gets defined without throws by the constructor # use when droppeded into bioperl - ok( my $inAlnStream = Bio::AlignIO->new(-file => test_input_file("characters.nexml.xml"), -format => 'nexml')); + ok( my $inAlnStream = Bio::AlignIO->new(-file => test_input_file("../../code/data_sets/characters.nexml.xml"), -format => 'nexml')); @@ -31,18 +31,19 @@ #tests for writing nexml alignments - ok( my $outAlnStream = Bio::AlignIO->new(-file => test_output_file('>charactersOut.xml'), -format => 'nexml'), 'Begin Tests for writing files'); + ok( my $outAlnStream = Bio::AlignIO->new(-file => test_output_data('>../../code/data_sets/charactersOut.xml'), -format => 'nexml'), 'Begin Tests for writing files');; - #still fails here need to fix this and link taxa ok( $outAlnStream->write_aln($aln_obj)); - ok( $aln_obj = $outAlnStream->next_aln() ); - isa_ok($aln_obj, 'Bio::SimpleAlign'); - is ($aln_obj->id, 'DNA sequences', "id"); + ok( my $inAlnStream2 = Bio::AlignIO->new(-file => test_input_data('../../code/data_sets/charactersOut.xml'), -format => 'nexml'), 'Begin Tests for writing files');; + + ok( my $aln_obj2 = $inAlnStream2->next_aln() ); + isa_ok($aln_obj2, 'Bio::SimpleAlign'); + is ($aln_obj2->id, 'DNA sequences', "id"); $num =0; @expected_seqs = ('ACGCTCGCATCGCATC', 'ACGCTCGCATCGCATT', 'ACGCTCGCATCGCATG'); #checking sequence objects - foreach my $seq_obj ($aln_obj->each_seq()) { + foreach my $seq_obj ($aln_obj2->each_seq()) { $num++; is( $seq_obj->alphabet, 'dna', "alphabet" ); Modified: bioperl-dev/trunk/t/SeqIO/nexml.t =================================================================== --- bioperl-dev/trunk/t/SeqIO/nexml.t 2009-07-13 19:00:40 UTC (rev 15850) +++ bioperl-dev/trunk/t/SeqIO/nexml.t 2009-07-14 14:39:53 UTC (rev 15851) @@ -11,7 +11,7 @@ # this passes if $object gets defined without throws by the constructor # use when droppeded into bioperl - ok( my $SeqStream = Bio::SeqIO->new(-file => test_input_file("characters.nexml.xml"), -format => 'nexml')); + ok( my $SeqStream = Bio::SeqIO->new(-file => test_input_file("../../code/data_sets/characters.nexml.xml"), -format => 'nexml')); @@ -45,10 +45,10 @@ #Start tests for writing to a file - ok( my $outSeqStream = Bio::SeqIO->new(-file => test_output_file('>charactersSeqsOut.xml'), -format => 'nexml'), 'Begin Tests for writing tree files');; + ok( my $outSeqStream = Bio::SeqIO->new(-file => test_output_data('>../../code/data_sets/charactersSeqsOut.xml'), -format => 'nexml'), 'Begin Tests for writing tree files');; ok( $outSeqStream->write_seq($seq_obj)); - my $inSeqStream = Bio::SeqIO->new(-file => test_input_file('charactersSeqsOut.xml'), -format => 'nexml'); + my $inSeqStream = Bio::SeqIO->new(-file => test_input_data('../../code/data_sets/charactersSeqsOut.xml'), -format => 'nexml'); #TODO when writing multiple seqs works this will be useful =head not done yet Modified: bioperl-dev/trunk/t/TreeIO/nexml.t =================================================================== --- bioperl-dev/trunk/t/TreeIO/nexml.t 2009-07-13 19:00:40 UTC (rev 15850) +++ bioperl-dev/trunk/t/TreeIO/nexml.t 2009-07-14 14:39:53 UTC (rev 15851) @@ -11,7 +11,7 @@ # this passes if $object gets defined without throws by the constructor - ok( my $TreeStream = Bio::TreeIO->new(-file => test_input_data('trees.nexml.xml'), -format => 'Nexml') ); + ok( my $TreeStream = Bio::TreeIO->new(-file => test_input_file('../../code/data_sets/trees.nexml.xml'), -format => 'Nexml') ); @@ -49,11 +49,12 @@ #Checking ability to write files - ok( my $outTreeStream = Bio::TreeIO->new(-file => test_output_data('>treesOut.xml'), -format => 'nexml'), 'Begin Tests for writing tree files');; + ok( my $outTreeStream = Bio::TreeIO->new(-file => test_output_data('>../../code/data_sets/treesOut.xml'), -format => 'nexml'), 'Begin Tests for writing tree files'); ok( $outTreeStream->write_tree($tree_obj)); - my $inTreeStream = Bio::TreeIO->new(-file => test_input_data('treesOut.xml'), -format => 'nexml'); + my $inTreeStream = Bio::TreeIO->new(-file => test_input_data('../../code/data_sets/treesOut.xml'), -format => 'nexml'); + #checking first tree object ok($tree_obj = $inTreeStream->next_tree() ); isa_ok($tree_obj, 'Bio::Tree::Tree'); Modified: bioperl-dev/trunk/t/nexml.t =================================================================== --- bioperl-dev/trunk/t/nexml.t 2009-07-13 19:00:40 UTC (rev 15850) +++ bioperl-dev/trunk/t/nexml.t 2009-07-14 14:39:53 UTC (rev 15851) @@ -8,18 +8,66 @@ use Bio::Nexml; -use_ok('Bio::TreeIO::nexml'); # checks that your module is there and loads ok +use_ok('Bio::Nexml'); # checks that your module is there and loads ok # this passes if $object gets defined without throws by the constructor - ok( my $TreeStream = Bio::TreeIO->new(-file => test_input_data('trees.nexml.xml'), -format => 'Nexml') ); + ok( my $TreeStream = Bio::TreeIO->new(-file => test_input_data('../code/data_sets/trees.nexml.xml'), -format => 'Nexml') ); + ok( my $AlnStream = Bio::AlignIO->new(-file => test_input_data('../code/data_sets/characters.nexml.xml'), -format => 'Nexml')); - #load tree - ok( my $tree_obj = $TreeStream->next_tree() ); - my $tree_obj1 = $TreeStream->next_tree(); - isa_ok($tree_obj, 'Bio::Tree::Tree'); + #load tree + ok( my $tree_obj1 = $TreeStream->next_tree() ); + my $tree_obj2 = $TreeStream->next_tree(); + isa_ok($tree_obj1, 'Bio::Tree::Tree'); + isa_ok($tree_obj2, 'Bio::Tree::Tree'); - my $nexml_doc = Bio::Nexml->new(-file => test_output_data('>out_nexml.doc'), -format => 'Nexml'); + my @trees; + push @trees, $tree_obj1; + push @trees, $tree_obj2; - #in progress - #$nexml_doc->write(-trees => \@trees); \ No newline at end of file + #load aln + ok (my $aln_obj1 = $AlnStream->next_aln() ); + my $aln_obj2 = $AlnStream->next_aln(); + + my @alns; + push @alns, $aln_obj1; + push @alns, $aln_obj2; + + my $nexml_doc = Bio::Nexml->new(-file => test_output_data('>../code/data_sets/out_nexml_doc.xml'), -format => 'Nexml'); + + + ok( $nexml_doc->write_doc(-trees => \@trees, -alns => \@alns) ); + + my $in_nexml_doc = Bio::Nexml->new(-file => test_input_data('../code/data_sets/out_nexml_doc.xml'), -format => 'Nexml'); + + ok ( my $bptree1 = $in_nexml_doc->next_tree() ); + + isa_ok($bptree1, 'Bio::Tree::Tree'); + is( $bptree1->get_root_node()->id(), 'n1', "root node"); + my @nodes = $bptree1->get_nodes(); + is( @nodes, 9, "number of nodes"); + ok ( my $node7 = $bptree1->find_node('n7') ); + is( $node7->branch_length, 0.3247, "branch length"); + is( $node7->ancestor->id, 'n3'); + is( $node7->ancestor->branch_length, '0.34534'); + + #Check leaf nodes and taxa + my %expected_leaves = ( + 'n8' => 'bird', + 'n9' => 'worm', + 'n5' => 'dog', + 'n6' => 'mouse', + 'n2' => 'human' + ); + + ok( my @leaves = $bptree1->get_leaf_nodes() ); + is( @leaves, 5, "number of leaf nodes"); + foreach my $leaf (@leaves) + { + my $leafID = $leaf->id(); + ok( exists $expected_leaves{$leaf->id()}, "$leafID exists" ); + is( $leaf->get_tag_values('taxon'), $expected_leaves{$leaf->id()}, "$leafID taxon"); + } + + my $bptree2 = $in_nexml_doc->next_tree(); + \ No newline at end of file From chmille4 at dev.open-bio.org Tue Jul 14 10:40:53 2009 From: chmille4 at dev.open-bio.org (Chase Miller) Date: Tue, 14 Jul 2009 10:40:53 -0400 Subject: [Bioperl-guts-l] [15852] bioperl-dev/trunk/Bio: Added more robust handling of writing nexml, specifically complete nexml documents. Message-ID: <200907141440.n6EEerwk021742@dev.open-bio.org> Revision: 15852 Author: chmille4 Date: 2009-07-14 10:40:52 -0400 (Tue, 14 Jul 2009) Log Message: ----------- Added more robust handling of writing nexml, specifically complete nexml documents. Added better handling of linking taxa data to matrices and trees Modified Paths: -------------- bioperl-dev/trunk/Bio/AlignIO/nexml.pm bioperl-dev/trunk/Bio/Nexml/Util.pm bioperl-dev/trunk/Bio/Nexml.pm bioperl-dev/trunk/Bio/SeqIO/nexml.pm bioperl-dev/trunk/Bio/TreeIO/nexml.pm Modified: bioperl-dev/trunk/Bio/AlignIO/nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/AlignIO/nexml.pm 2009-07-14 14:39:53 UTC (rev 15851) +++ bioperl-dev/trunk/Bio/AlignIO/nexml.pm 2009-07-14 14:40:52 UTC (rev 15852) @@ -128,8 +128,20 @@ =cut sub write_aln { + my $self = shift(@_); + my ($matrix, $taxa) = Bio::Nexml::Util->create_bphylo_aln(@_); + $matrix->set_taxa($taxa); - return (Bio::Nexml::Util->write_aln(@_)); + my $matrices = Bio::Phylo::Matrices->new(); + my $proj = Bio::Phylo::Factory->create_project(); + + $matrices->insert($matrix); + $proj->insert($matrix); + print $proj->to_xml(); + $self->_print($proj->to_xml()); + + return 1; + #return (Bio::Nexml::Util->write_aln(@_)); } Modified: bioperl-dev/trunk/Bio/Nexml/Util.pm =================================================================== --- bioperl-dev/trunk/Bio/Nexml/Util.pm 2009-07-14 14:39:53 UTC (rev 15851) +++ bioperl-dev/trunk/Bio/Nexml/Util.pm 2009-07-14 14:40:52 UTC (rev 15852) @@ -76,6 +76,8 @@ use strict; use Bio::Phylo::Matrices::Matrix; +use Bio::Phylo::Matrices::Datatype::Rna; + #not sure that it needs to inerhit from Bio::Nexml use base qw(Bio::Nexml); @@ -116,29 +118,32 @@ #Check if theres a row label and if not default to seqID if( !defined($rowlabel = $row->get_name())) {$rowlabel = $seqID;} - - #check if taxon linked to sequence - if(my $taxon = $row->get_taxon) - { - print $taxon->get_name(); - } - - -# I would allow the LocatableSeq constructor to handle setting start and end, -# you can leave attrs out -- UNLESS nexml has a slot for these coordinates; -# I would dig around for this. /maj - $seq = Bio::LocatableSeq->new( -seq => $newSeq, -display_id => "$seqID", #-description => $desc, -alphabet => $mol_type, ); - + my $feat; + #check if taxon linked to sequence if so create feature to attach to alignment + foreach my $taxa_o (@$taxa) + { + my $taxa_ents = $taxa_o->get_entities(); + foreach my $taxon (@$taxa_ents) + { + if($taxon eq $row->get_taxon) + { + my $taxon_name = $taxon->get_name(); + $feat = Bio::SeqFeature::Generic->new(); + $feat->add_tag_value('taxon', "$taxon_name"); + $feat->add_tag_value('id', "$seqID"); + } + } + } - #what other data is appropriate to pull over from bio::phylo::matrices::matrix?? $aln->add_seq($seq); + $aln->add_SeqFeature($feat); $self->debug("Reading r$seqID\n"); @@ -189,8 +194,12 @@ #transfer attributes that apply to all nodes #check if taxa data exists for the current node ($terminal) - if(my $taxon = $terminal->get_taxon()) { - $newNode->add_tag_value("taxon", $taxon->get_name()); + my $taxa_ents = $taxa->[0]->get_entities(); + foreach my $taxon (@$taxa_ents) + { + if($taxon eq $terminal->get_taxon()) { + $newNode->add_tag_value("taxon", $taxon->get_name()); + } } #check if you've reached the root of the tree and if so, stop. @@ -232,6 +241,7 @@ sub _make_seq { my($self, $proj) = @_; my $matrices = $proj->get_matrices(); + my $taxa = $proj->get_taxa(); my @seqs; foreach my $matrix (@$matrices) @@ -259,16 +269,7 @@ #build the seq object using the factory create method - #not sure if this is the preferred way, but couldn't get it to work - #my $seq = $self->sequence_factory->create( - # -seq => $newSeq, - # -id => $rowlabel, - # -primary_id => $seqID, - # #-desc => $fulldesc, - # -alphabet => $mol_type, - # -direct => 1, - # ); - #did this instead + my $seqbuilder = new Bio::Seq::SeqFactory('-type' => 'Bio::Seq'); my $seq = $seqbuilder->create( @@ -279,7 +280,24 @@ -alphabet => $mol_type, -direct => 1, ); - + #check if taxon linked to sequence if so create feature to attach to alignment + my $feat; + foreach my $taxa_o (@$taxa) + { + my $taxa_ents = $taxa_o->get_entities(); #TODO handle mutiple taxa + foreach my $taxon (@$taxa_ents) + { + if($taxon eq $row->get_taxon) + { + my $taxon_name = $taxon->get_name(); + $feat = Bio::SeqFeature::Generic->new(); + $feat->add_tag_value('taxon', "$taxon_name"); + $feat->add_tag_value('id', $seqID); + last; + } + } + } + $seq->add_SeqFeature($feat); push (@seqs, $seq); #what other data is appropriate to pull over from bio::phylo::matrices::matrix?? } @@ -287,8 +305,8 @@ return \@seqs; } -sub write_tree { - my ($self, $caller, $bptree) = @_; +sub create_bphylo_tree { + my ($self, $bptree) = @_; #most of the code below ripped form Bio::Phylo::Forest::Tree::new_from_bioperl()d my $tree = $fac->create_tree; @@ -309,56 +327,13 @@ $tree->set_score( $score ) if defined $score; } else { - $caller->throw('Not a bioperl tree!'); + $self->throw('Not a bioperl tree!'); } - my $ents2 = $taxa->get_entities(); - my $forest = $fac->create_forest(); - $taxa->set_forest($forest); - $forest->insert($tree); - my $proj = $fac->create_project(); - $proj->insert($forest); - - my $ents = $taxa->get_entities(); - - - - #$caller->_print($taxa->to_xml()); - #$caller->_print($forest->to_xml()); - $caller->_print(''); - $caller->_print("\n"); - $caller->_print($proj->to_xml); - return $tree; + return $tree, $taxa; } -sub _create_phylo_tree { - my ($self, $caller, $bptree) = @_; - #most of the code below ripped form Bio::Phylo::Forest::Tree::new_from_bioperl()d - - my $tree = $fac->create_tree; - my $taxa = $fac->create_taxa; - my $class = 'Bio::Phylo::Forest::Tree'; - - if ( Scalar::Util::blessed $bptree && $bptree->isa('Bio::Tree::TreeI') ) { - bless $tree, $class; - ($tree, $taxa) = _copy_tree( $tree, $bptree->get_root_node, "", $taxa); - - # copy name - my $name = $bptree->id; - $tree->set_name( $name ) if defined $name; - - # copy score - my $score = $bptree->score; - $tree->set_score( $score ) if defined $score; - } - else { - $caller->throw('Not a bioperl tree!'); - } - - return $tree; -} - sub _copy_tree { my ( $tree, $bpnode, $parent, $taxa ) = @_; my $node = Bio::Phylo::Forest::Node->new_from_bioperl($bpnode); @@ -366,6 +341,7 @@ if ($parent) { $parent->set_child($node); } + #TODO get taxa label and find a way to relate it to the bioperl tag values so they can be retrieved on the other end if (my $bptaxon = $bpnode->get_tag_values('taxon')) { $taxon = $fac->create_taxon(-name => $bptaxon); @@ -379,9 +355,9 @@ return $tree, $taxa; } -sub write_aln { +sub create_bphylo_aln { - my ($self, $caller, $aln, @args) = @_; + my ($self, $aln, @args) = @_; #most of the code below ripped from Bio::Phylo::Matrices::Matrix::new_from_bioperl() my $factory = Bio::Phylo::Factory->new(); @@ -397,6 +373,7 @@ else { $type = 'dna'; } + my $matrix = $factory->create_matrix( '-type' => $type, '-special_symbols' => { @@ -410,26 +387,33 @@ for my $field ( qw(description accession id annotation consensus_meta score source) ) { $matrix->$field( $aln->$field ); } - my $to = $matrix->get_type_object; + my $to = $matrix->get_type_object; + my @feats = $aln->get_all_SeqFeatures(); + my $taxa = $factory->create_taxa(); for my $seq ( @seqs ) { - my $datum = Bio::Phylo::Matrices::Datum->new_from_bioperl( - $seq, '-type_object' => $to - ); + #create taxa + + my $datum = create_bphylo_datum($seq, \@feats, $taxa, '-type_object' => $to); $matrix->insert($datum); } - $self->_print($matrix->to_xml()); - return $matrix; + #$self->_print($matrix->to_xml()); + return $matrix, $taxa; } else { $self->throw('Not a bioperl alignment!'); } } -sub write_seq { - my ($self, $caller, $seq, @args) = @_; +sub create_bphylo_seq { + my ($self, $seq, @args) = @_; my $type = $seq->alphabet || $seq->_guess_alphabet || 'dna'; $type = uc($type); - my $dat = $fac->create_datum( '-type' => $type); + #my $dat = $fac->create_datum( '-type' => $type); + + my @feats = $seq->get_all_SeqFeatures(); + my $taxa = $fac->create_taxa(); + + my $dat = create_bphylo_datum($seq, \@feats, $taxa, '-type' => $type); # copy seq string my $seqstring = $seq->seq; @@ -437,7 +421,7 @@ eval { $dat->set_char( $seqstring ) }; #TODO Test debuggin if ( $@ and UNIVERSAL::isa($@,'Bio::Phylo::Util::Exceptions::InvalidData') ) { - $caller->throw( + $self->throw( "\nAn exception of type Bio::Phylo::Util::Exceptions::InvalidData was caught\n\n". $@->description . "\n\nThe BioPerl sequence object contains invalid data ($seqstring)\n" . @@ -451,38 +435,92 @@ # copy name my $name = $seq->display_id; - $dat->set_name( $name ) if defined $name; + #$dat->set_name( $name ) if defined $name; # copy desc my $desc = $seq->desc; $dat->set_desc( $desc ) if defined $desc; #get features from SeqFeatureI - #TODO test SeqFeatures - if (my $feat = $seq->get_SeqFeatures()) { - - my $start = $feat->start; - $dat->start($start) if defined $start; - - my $end = $feat->end; - $dat->end($start) if defined $end; - - my $strand = $feat->strand; - $dat->strand($start) if defined $strand; - } + for my $field ( qw(start end strand) ) { + $dat->$field( $seq->$field ) if $seq->can($field); + } my $matrix = $fac->create_matrix(-type => $type); $matrix->set_name($seq->display_name()); + print $dat->to_xml(); $matrix->insert($dat); - my $proj = $fac->create_project(); - $proj->insert($matrix); + #my $proj = $fac->create_project(); + #$proj->insert($matrix); @@ Diff output truncated at 10000 characters. @@ From bugzilla-daemon at portal.open-bio.org Tue Jul 14 12:39:00 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 14 Jul 2009 12:39:00 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907141639.n6EGd0V5024634@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #8 from tristan.lefebure at gmail.com 2009-07-14 12:38 EST ------- (In reply to comment #7) I'm still confused about the validity of (B,C,(A,D))68 , but for sure this is ugly, and many phylogeneticist and tree editors will have problem reading it. So yes, valid or not, it would be better to avoid it. Let's go back from this extremely simple example: The original unrooted tree is: A B \ 68 / \h______g/ / \ / \ D C Once imported in bioperl it is rooted with h, and the bootstrap score is stored within g: D A B C \ \ \ | \ \ \ | \ \ \ | \ \ \ | \ \ \| \ \ +g \ \ | \ \ |find_node(-id =>"A"); my $nodeD = $t->find_node(-id =>"D"); my $h = $t->get_lca(($nodeA,$nodeD)); my $nodeB = $t->find_node(-id =>"B"); my $nodeC = $t->find_node(-id =>"C"); my $g = $t->get_lca(($nodeB,$nodeC)); print "Bootstrap of h is ", $h->bootstrap, "\n"; print "Bootstrap of g is ", $g->bootstrap, "\n"; ##-> Bootstrap of h is ##-> Bootstrap of g is 68 That's all good since in bioperl, the bootstrap of a branch is stored in the descendant node of that branch. Now, if we root the tree with g we get: B C A D \ \ \ | \ \ \ | \ \ \ | \ \ \ | \ \ \| \ \ +h \ \ | \ \ | \ \ | \ \| \----+g bootstrap, "\n"; print "Bootstrap of g is ", $g->bootstrap, "\n"; ##-> Bootstrap of h is ##-> Bootstrap of g is 68 This is were I am confused, I believe it should be stored in h, as g is the ancestor of h. By looking at the reroot() code, I might have spotted the problem, which would explain why (i) it only concerns the bootstrap score that is attached to the new root (as observed using test on bigger trees), and (ii) why we do not observe the same problem with the branch length. TreeFunctionsI.pm, line 952 to 965: my $tmp_node = $new_root->create_node_on_branch(-position=>0,-force=>1); # reverse the ancestor & children pointers my $former_anc = $tmp_node->ancestor; my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node); for (my $i = 0; $i < @path_from_oldroot - 1; $i++) { my $current = $path_from_oldroot[$i]; my $next = $path_from_oldroot[$i + 1]; $current->remove_Descendent($next); $current->branch_length($next->branch_length); $current->bootstrap($next->bootstrap) if defined $next->bootstrap; $next->remove_tag('B'); $next->add_Descendent($current); } So here you create a temporary node, between h and g, with the branch ($tmp_node - $g) equal to zero. In other words, the branch length was transmitted from $g to $tmp_node, but not the bootstrap score. You then reverse the ancestor and children pointers, and transfer the branch length and bootstrap scores along the path between $h and $tmp_node (here there is only one branch). Finally, $tmp_node is removed. Works great except that $g bootstrap transfert has been missed. What about: - modifying create_node_on_branch() to allow the transfert of the bootstrap score - or include something like that in reroot at line 953: $tmp_node->bootstrap($new_root->bootstrap) if defined $new_root->bootstrap; What do you think? (sorry for the lengthy comment!) > I see your point. If you saw a tree marked like that in a paper, you'd say > "Whaa?" If the tree in a paper were unrooted, one would say, "Ah, the partition > defined by this branch has 68% 'support'" or whatever. Now, in the script we've > just been playing with the root, saying "let's pick up the tree here, and let > the rest hang down", and when we flatten the tree to a linear text > representation, the root gets spread out over the whole text, viz. as > parentheses at both ends, with the bootstrap value hanging off the side. This > is ugly, but still not invalid I think. > > But I'm not trying to get out of any work-- What we can do is modify the > algorithm to say "if a bootstrap value gets shoved to the root during > rerooting, shove it back to the node at the other end of the branch." That > would give us a canonical form that's less off-putting...sound good? > -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From scain at dev.open-bio.org Tue Jul 14 13:42:10 2009 From: scain at dev.open-bio.org (Scott Cain) Date: Tue, 14 Jul 2009 13:42:10 -0400 Subject: [Bioperl-guts-l] [15853] bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI: fixed typo in sql Message-ID: <200907141742.n6EHgAE7022012@dev.open-bio.org> Revision: 15853 Author: scain Date: 2009-07-14 13:42:10 -0400 (Tue, 14 Jul 2009) Log Message: ----------- fixed typo in sql Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm 2009-07-14 14:40:52 UTC (rev 15852) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm 2009-07-14 17:42:10 UTC (rev 15853) @@ -552,7 +552,7 @@ AND ($sql_regexp) END $sql .= "LIMIT $limit" if defined $limit; - $self->_print_query($sql, at tags, at words) if DEBUG || $self->debug; + $self->_print_query($sql, at tags, at wild_card_words) if DEBUG || $self->debug; my $sth = $self->_prepare($sql); $sth->execute(@tags, at wild_card_words) or $self->throw($sth->errstr); Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2009-07-14 14:40:52 UTC (rev 15852) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2009-07-14 17:42:10 UTC (rev 15853) @@ -462,7 +462,7 @@ my $dbh = $self->dbh; my $attributelist_table = $self->_attributelist_table; - my $a = $dbh->selectcol_arrayref("SELECT tag FROM$attributelist_table") + my $a = $dbh->selectcol_arrayref("SELECT tag FROM $attributelist_table") or $self->throw($dbh->errstr); return @$a; } From lstein at dev.open-bio.org Tue Jul 14 17:35:39 2009 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Tue, 14 Jul 2009 17:35:39 -0400 Subject: [Bioperl-guts-l] [15854] bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI: added SQLite adaptor from Nathan Weeks - doesn' t pass all regression tests yet Message-ID: <200907142135.n6ELZdDj022488@dev.open-bio.org> Revision: 15854 Author: lstein Date: 2009-07-14 17:35:39 -0400 (Tue, 14 Jul 2009) Log Message: ----------- added SQLite adaptor from Nathan Weeks - doesn't pass all regression tests yet Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm Added Paths: ----------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm Added: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm (rev 0) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm 2009-07-14 21:35:39 UTC (rev 15854) @@ -0,0 +1,916 @@ +package Bio::DB::SeqFeature::Store::DBI::SQLite; + +#$Id$ + +=head1 NAME + +Bio::DB::SeqFeature::Store::DBI::SQLite -- SQLite implementation of Bio::DB::SeqFeature::Store + +=head1 SYNOPSIS + + use Bio::DB::SeqFeature::Store; + + # Open the sequence database + my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::SQLite', + -dsn => '/path/to/database.db'); + + # get a feature from somewhere + my $feature = Bio::SeqFeature::Generic->new(...); + + # store it + $db->store($feature) or die "Couldn't store!"; + + # primary ID of the feature is changed to indicate its primary ID + # in the database... + my $id = $feature->primary_id; + + # get the feature back out + my $f = $db->fetch($id); + + # change the feature and update it + $f->start(100); + $db->update($f) or die "Couldn't update!"; + + # searching... + # ...by id + my @features = $db->fetch_many(@list_of_ids); + + # ...by name + @features = $db->get_features_by_name('ZK909'); + + # ...by alias + @features = $db->get_features_by_alias('sma-3'); + + # ...by type + @features = $db->get_features_by_name('gene'); + + # ...by location + @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000); + + # ...by attribute + @features = $db->get_features_by_attribute({description => 'protein kinase'}) + + # ...by the GFF "Note" field + @result_list = $db->search_notes('kinase'); + + # ...by arbitrary combinations of selectors + @features = $db->features(-name => $name, + -type => $types, + -seq_id => $seqid, + -start => $start, + -end => $end, + -attributes => $attributes); + + # ...using an iterator + my $iterator = $db->get_seq_stream(-name => $name, + -type => $types, + -seq_id => $seqid, + -start => $start, + -end => $end, + -attributes => $attributes); + + while (my $feature = $iterator->next_seq) { + # do something with the feature + } + + # ...limiting the search to a particular region + my $segment = $db->segment('Chr1',5000=>6000); + my @features = $segment->features(-type=>['mRNA','match']); + + # getting & storing sequence information + # Warning: this returns a string, and not a PrimarySeq object + $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...'); + my $sequence = $db->fetch_sequence('Chr1',5000=>6000); + + # what feature types are defined in the database? + my @types = $db->types; + + # create a new feature in the database + my $feature = $db->new_feature(-primary_tag => 'mRNA', + -seq_id => 'chr3', + -start => 10000, + -end => 11000); + +=head1 DESCRIPTION + +Bio::DB::SeqFeature::Store::SQLite is the SQLite adaptor for +Bio::DB::SeqFeature::Store. You will not create it directly, but +instead use Bio::DB::SeqFeature::Store-Enew() to do so. + +See L for complete usage instructions. + +=head2 Using the SQLite adaptor + +To establish a connection to the database, call +Bio::DB::SeqFeature::Store-Enew(-adaptor=E'DBI::SQLite', at more_args). The +additional arguments are as follows: + + Argument name Description + ------------- ----------- + + -dsn The path to the SQLite database file. + + -namespace A prefix to attach to each table. This allows you + to have several virtual databases in the same + physical database. + + -temp Boolean flag. If true, a temporary database + will be created and destroyed as soon as + the Store object goes out of scope. (synonym -temporary) + + -autoindex Boolean flag. If true, features in the database will be + reindexed every time they change. This is the default. + + + -tmpdir Directory in which to place temporary files during "fast" loading. + Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp) + + -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes." + (synonyms -options, -dbi_attr) + + -write Pass true to open database for writing or updating. + +If successful, a new instance of +Bio::DB::SeqFeature::Store::DBI::SQLite will be returned. + +In addition to the standard methods supported by all well-behaved +Bio::DB::SeqFeature::Store databases, several following +adaptor-specific methods are provided. These are described in the next +sections. + +=cut + +use strict; + +use base 'Bio::DB::SeqFeature::Store::DBI::mysql'; +use Bio::DB::SeqFeature::Store::DBI::Iterator; +use DBI qw(:sql_types); +use Memoize; +use Cwd 'abs_path'; +use Bio::DB::GFF::Util::Rearrange 'rearrange'; +use Bio::SeqFeature::Lite; +use File::Spec; +use constant DEBUG=>0; + +# Using same limits as MySQL adaptor so I don't have to make something up. +use constant MAX_INT => 2_147_483_647; +use constant MIN_INT => -2_147_483_648; +use constant MAX_BIN => 1_000_000_000; # size of largest feature = 1 Gb +use constant MIN_BIN => 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these + +### +# object initialization +# +sub init { + my $self = shift; + my ($dsn, + $is_temporary, + $autoindex, + $namespace, + $dump_dir, + $user, + $pass, + $dbi_options, + $writeable, + $create, + ) = rearrange(['DSN', + ['TEMP','TEMPORARY'], + 'AUTOINDEX', + 'NAMESPACE', + ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'], + 'USER', + ['PASS','PASSWD','PASSWORD'], + ['OPTIONS','DBI_OPTIONS','DBI_ATTR'], + ['WRITE','WRITEABLE'], + 'CREATE', + ], at _); + $dbi_options ||= {}; + $writeable = 1 if $is_temporary or $dump_dir; + + $dsn or $self->throw("Usage: ".__PACKAGE__."->init(-dsn => \$dbh || \$dsn)"); + + my $dbh; + if (ref $dsn) { + $dbh = $dsn; + } else { + $dsn = "dbi:SQLite:$dsn" unless $dsn =~ /^dbi:/; + $dbh = DBI->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr); + $dbh->do("PRAGMA synchronous = OFF;"); # makes writes much faster + $dbh->do("PRAGMA temp_store = MEMORY;"); # less disk I/O; some speedup + } + $self->{dbh} = $dbh; + $self->{is_temp} = $is_temporary; + $self->{namespace} = $namespace; + $self->{writeable} = $writeable; + + $self->default_settings; + $self->autoindex($autoindex) if defined $autoindex; + $self->dumpdir($dump_dir) if $dump_dir; + if ($self->is_temp) { + $self->init_tmp_database(); + } elsif ($create) { + $self->init_database('erase'); + } +} + +sub table_definitions { + my $self = shift; + return { + feature => < < < < < < < < <dbh; + my $dir = $self->{dumpdir} || '.'; + + $dbh->begin_work; # making this a transaction greatly improves performance + + for my $table ('feature', $self->index_tables) { + my $fh = $self->dump_filehandle($table); + my $path = $self->dump_path($table); + $fh->close; + open($fh, $path); + my $qualified_table = $self->_qualify($table); + + my $sth; + if ($table eq 'feature') { + $sth = $dbh->prepare("REPLACE INTO $qualified_table VALUES (?,?,?,?,?,?,?,?,?,?)"); + + while (<$fh>) { + chomp(); + my ($id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$obj) = + split(/\t/); + $sth->bind_param(1, $id); + $sth->bind_param(2, $typeid); + $sth->bind_param(3, $seqid); + $sth->bind_param(4, $start); + $sth->bind_param(5, $end); + $sth->bind_param(6, $strand); + $sth->bind_param(7, $tier); + $sth->bind_param(8, $bin); @@ Diff output truncated at 10000 characters. @@ From lstein at dev.open-bio.org Tue Jul 14 23:21:21 2009 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Tue, 14 Jul 2009 23:21:21 -0400 Subject: [Bioperl-guts-l] [15855] bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI: fixed attribute search Message-ID: <200907150321.n6F3LLwe022960@dev.open-bio.org> Revision: 15855 Author: lstein Date: 2009-07-14 23:21:21 -0400 (Tue, 14 Jul 2009) Log Message: ----------- fixed attribute search Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm 2009-07-14 21:35:39 UTC (rev 15854) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/SQLite.pm 2009-07-15 03:21:21 UTC (rev 15855) @@ -560,6 +560,14 @@ return $iterator ? Bio::DB::SeqFeature::Store::DBI::Iterator->new($sth,$self) : $self->_sth2objs($sth); } +sub _make_attribute_group { + my $self = shift; + my ($table_name,$attributes) = @_; + my $key_count = keys %$attributes or return; + my $count = $key_count-1; + return "f.id HAVING count(f.id)>$count"; +} + # Do a case-insensitive search a la the PostgreSQL adaptor sub _name_sql { my $self = shift; Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2009-07-14 21:35:39 UTC (rev 15854) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2009-07-15 03:21:21 UTC (rev 15855) @@ -422,25 +422,12 @@ my $table = $self->_qualify($_); $dbh->do("DROP table IF EXISTS $table") if $erase; my $query = "CREATE TABLE IF NOT EXISTS $table $tables->{$_}"; - for my $q (split ';',$query) { - chomp($q); - next unless $q =~ /\S/; - $dbh->do("$q;\n") or $self->throw($dbh->errstr); - } + $self->_create_table($dbh,$query); } $self->subfeatures_are_indexed(1) if $erase; 1; } -sub maybe_create_meta { - my $self = shift; - return unless $self->writeable; - my $table = $self->_qualify('meta'); - my $tables = $self->table_definitions; - my $temporary = $self->is_temp ? 'TEMPORARY' : ''; - $self->dbh->do("CREATE $temporary TABLE IF NOT EXISTS $table $tables->{meta}"); -} - sub init_tmp_database { my $self = shift; my $dbh = $self->dbh; @@ -449,11 +436,30 @@ next if $t eq 'meta'; # done earlier my $table = $self->_qualify($t); my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}"; - $dbh->do($query) or $self->throw($dbh->errstr); + $self->_create_table($dbh,$query); } 1; } +sub _create_table { + my $self = shift; + my ($dbh,$query) = @_; + for my $q (split ';',$query) { + chomp($q); + next unless $q =~ /\S/; + $dbh->do("$q;\n") or $self->throw($dbh->errstr); + } +} + +sub maybe_create_meta { + my $self = shift; + return unless $self->writeable; + my $table = $self->_qualify('meta'); + my $tables = $self->table_definitions; + my $temporary = $self->is_temp ? 'TEMPORARY' : ''; + $self->dbh->do("CREATE $temporary TABLE IF NOT EXISTS $table $tables->{meta}"); +} + ### # use temporary tables # From bugzilla-daemon at portal.open-bio.org Wed Jul 15 10:28:07 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 10:28:07 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151428.n6FES7FQ001037@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #9 from maj at fortinbras.us 2009-07-15 10:28 EST ------- Tristan-- thanks a lot for all this work-- I think we've got it now: please look at the following output: reroot on B:((C:50,(A:52,D:70):11)68:46)B; roundtrip:(A:52,(B:46,C:50)68:11,D:70); with fake midpoint node:(A:52,(C:50,(B:23)fake:23)68:11,D:70); reroot on fake node:(B:23,(C:50,(A:52,D:70):11)68:23)fake; clear id on fake node:(B:23,(C:50,(A:52,D:70):11)68:23); roundtrip (remove fake node):(A:52,(B:23,C:50)68:11,D:70); -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Wed Jul 15 10:51:31 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 10:51:31 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151451.n6FEpVMg001832@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #10 from tristan.lefebure at gmail.com 2009-07-15 10:51 EST ------- (In reply to comment #9) Well, I think the following trees have problems: >reroot on B:((C:50,(A:52,D:70):11)68:46)B; >reroot on fake node:(B:23,(C:50,(A:52,D:70):11)68:23)fake; >clear id on fake node:(B:23,(C:50,(A:52,D:70):11)68:23); The bootpstrap score should be attached to (B,C)68 or (A,D)68. Here we have (C,(A,D))68 which is equivalent to (B)68. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Wed Jul 15 10:53:54 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 10:53:54 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151453.n6FErsvb001952@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #11 from maj at fortinbras.us 2009-07-15 10:53 EST ------- with a new argument to splice() to preserve branch lengths when removing nodes, and testing a new convenience function "reroot_at_midpoint" to provide a reroot that people might be used to: before reroot:(A:52,(B:46,C:50)68:11,D:70); reroot on B:((C:50,(A:52,D:70):11)68:46)B; roundtrip:(A:52,(B:46,C:50)68:11,D:70); with fake midpoint node:(A:52,(C:50,(B:23)fake:23)68:11,D:70); reroot on fake node:(B:23,(C:50,(A:52,D:70):11)68:23)fake; clear id on fake node:(B:23,(C:50,(A:52,D:70):11)68:23); roundtrip (remove fake node):(A:52,(B:46,C:50)68:11,D:70); reroot_at_midpt:(B:23,(C:50,(A:52,D:70):11)68:23); -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Wed Jul 15 10:56:54 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 10:56:54 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151456.n6FEuspJ002044@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #12 from maj at fortinbras.us 2009-07-15 10:56 EST ------- If you want that, I think you have to remove the polytomy from the original tree root. If you want A and D grouped explicitly, do (A:52,(B:46,C:50)68:11,D:70) ( (A:52,D:70),(B:46,C:50)68:11 ) which makes the polytomy soft. (In reply to comment #10) > (In reply to comment #9) > > Well, I think the following trees have problems: > > >reroot on B:((C:50,(A:52,D:70):11)68:46)B; > >reroot on fake node:(B:23,(C:50,(A:52,D:70):11)68:23)fake; > >clear id on fake node:(B:23,(C:50,(A:52,D:70):11)68:23); > > The bootpstrap score should be attached to (B,C)68 or (A,D)68. Here we have > (C,(A,D))68 which is equivalent to (B)68. > -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From maj at dev.open-bio.org Wed Jul 15 11:10:26 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 15 Jul 2009 11:10:26 -0400 Subject: [Bioperl-guts-l] [15856] bioperl-dev/trunk/Bio: keyword subst Message-ID: <200907151510.n6FFAQZE024786@dev.open-bio.org> Revision: 15856 Author: maj Date: 2009-07-15 11:10:25 -0400 (Wed, 15 Jul 2009) Log Message: ----------- keyword subst Modified Paths: -------------- bioperl-dev/trunk/Bio/AlignIO/nexml.pm bioperl-dev/trunk/Bio/Nexml/Util.pm bioperl-dev/trunk/Bio/Nexml.pm bioperl-dev/trunk/Bio/SeqIO/nexml.pm bioperl-dev/trunk/Bio/TreeIO/nexml.pm Property Changed: ---------------- bioperl-dev/trunk/Bio/AlignIO/nexml.pm bioperl-dev/trunk/Bio/Nexml/Util.pm bioperl-dev/trunk/Bio/Nexml.pm bioperl-dev/trunk/Bio/SeqIO/nexml.pm bioperl-dev/trunk/Bio/TreeIO/nexml.pm Modified: bioperl-dev/trunk/Bio/AlignIO/nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/AlignIO/nexml.pm 2009-07-15 03:21:21 UTC (rev 15855) +++ bioperl-dev/trunk/Bio/AlignIO/nexml.pm 2009-07-15 15:10:25 UTC (rev 15856) @@ -1,4 +1,4 @@ -# $Id: +# $Id$ # # BioPerl module for Bio::AlignIO::nexml # Property changes on: bioperl-dev/trunk/Bio/AlignIO/nexml.pm ___________________________________________________________________ Name: svn:keywords + Id Date Author Rev Modified: bioperl-dev/trunk/Bio/Nexml/Util.pm =================================================================== --- bioperl-dev/trunk/Bio/Nexml/Util.pm 2009-07-15 03:21:21 UTC (rev 15855) +++ bioperl-dev/trunk/Bio/Nexml/Util.pm 2009-07-15 15:10:25 UTC (rev 15856) @@ -1,3 +1,4 @@ +# $Id$ # # BioPerl module for Bio::TreeIO::nexml # Property changes on: bioperl-dev/trunk/Bio/Nexml/Util.pm ___________________________________________________________________ Name: svn:keywords + Id Date Author Rev Modified: bioperl-dev/trunk/Bio/Nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/Nexml.pm 2009-07-15 03:21:21 UTC (rev 15855) +++ bioperl-dev/trunk/Bio/Nexml.pm 2009-07-15 15:10:25 UTC (rev 15856) @@ -1,3 +1,4 @@ +# $Id$ # BioPerl module for Bio::Nexml # # Please direct questions and support issues to @@ -284,4 +285,4 @@ return $label_str; } -1; \ No newline at end of file +1; Property changes on: bioperl-dev/trunk/Bio/Nexml.pm ___________________________________________________________________ Name: svn:keywords + Id Date Author Rev Modified: bioperl-dev/trunk/Bio/SeqIO/nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/SeqIO/nexml.pm 2009-07-15 03:21:21 UTC (rev 15855) +++ bioperl-dev/trunk/Bio/SeqIO/nexml.pm 2009-07-15 15:10:25 UTC (rev 15856) @@ -1,4 +1,4 @@ -# $Id: fasta.pm 15549 2009-02-21 00:48:48Z maj $ +# $Id$ # BioPerl module for Bio::SeqIO::nexml # # Please direct questions and support issues to @@ -79,6 +79,8 @@ use lib '../..'; use Bio::Phylo::Matrices::Datum; +# may want to call these directly off the class below, for ease of +# reading later - /maj use Bio::Phylo::IO qw (parse unparse); use Bio::Seq; use Bio::Seq::SeqFactory; @@ -120,6 +122,8 @@ # + # i.e., my $proj = Bio::Phylo::IO->parse(...); /maj + my $proj = parse( '-file' => $self->{'_file'}, '-format' => 'nexml', Property changes on: bioperl-dev/trunk/Bio/SeqIO/nexml.pm ___________________________________________________________________ Name: svn:keywords + Id Date Author Rev Modified: bioperl-dev/trunk/Bio/TreeIO/nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/TreeIO/nexml.pm 2009-07-15 03:21:21 UTC (rev 15855) +++ bioperl-dev/trunk/Bio/TreeIO/nexml.pm 2009-07-15 15:10:25 UTC (rev 15856) @@ -1,4 +1,4 @@ -# $Id: nexml.pm +# $Id$ # # BioPerl module for Bio::TreeIO::nexml # Property changes on: bioperl-dev/trunk/Bio/TreeIO/nexml.pm ___________________________________________________________________ Name: svn:keywords + Id Date Author Rev From bugzilla-daemon at portal.open-bio.org Wed Jul 15 11:43:12 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 11:43:12 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151543.n6FFhCfN003605@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #13 from tristan.lefebure at gmail.com 2009-07-15 11:43 EST ------- (In reply to comment #12) A bootstrap score is a bipartition frequency, which here, formatted into phylip/paup spirit gives: ABCD .**. 68 The presence or absence of the original polytomy does nothing to this bipartition, and so, should not impact downstream results. Also, most tree search program export unrooted trees with a polytomy at the base of the tree. It would be cumbersome not to support them... In my view, the assignment of the score 68 to any other bipartition is wrong. For example, the following bipartion found in some of the test trees you reported is different from the original one, and is therefore, I believe, invalid: ABCD .*.. 68 Sorry to be so picky, but I'm still not convinced reroot() works well... -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Wed Jul 15 11:57:06 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 11:57:06 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151557.n6FFv6c5004196@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 ------- Comment #14 from maj at fortinbras.us 2009-07-15 11:57 EST ------- Then the problem lies with the newick format, which obviously cannot represent bipartitions when not all nodes bifurcate, coupled with the way trees are represented in bioperl, which does not contain an explicit branch object. So perhaps the underlying problem is really that BioPerl needs an enhanced representation of trees. This is a very reasonable enhancement and I would encourage submitting it to bugzilla. Unfortunately, I can't spend any more time on this. I will commit the changes I've made and close the bug. Please open a new bug to contain the continuing issues you have. cheers (In reply to comment #13) > (In reply to comment #12) > > A bootstrap score is a bipartition frequency, which here, formatted into > phylip/paup spirit gives: > > ABCD > .**. 68 > > The presence or absence of the original polytomy does nothing to this > bipartition, and so, should not impact downstream results. Also, most tree > search program export unrooted trees with a polytomy at the base of the tree. > It would be cumbersome not to support them... > > In my view, the assignment of the score 68 to any other bipartition is wrong. > For example, the following bipartion found in some of the test trees you > reported is different from the original one, and is therefore, I believe, > invalid: > > ABCD > .*.. 68 > > Sorry to be so picky, but I'm still not convinced reroot() works well... > -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From maj at dev.open-bio.org Wed Jul 15 11:58:21 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 15 Jul 2009 11:58:21 -0400 Subject: [Bioperl-guts-l] [15857] bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm: finishing bug 2877, with an addl convenience function Message-ID: <200907151558.n6FFwLSU025068@dev.open-bio.org> Revision: 15857 Author: maj Date: 2009-07-15 11:58:21 -0400 (Wed, 15 Jul 2009) Log Message: ----------- finishing bug 2877, with an addl convenience function Modified Paths: -------------- bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm Modified: bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm =================================================================== --- bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm 2009-07-15 15:10:25 UTC (rev 15856) +++ bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm 2009-07-15 15:58:21 UTC (rev 15857) @@ -244,7 +244,7 @@ sub splice { my ($self, @args) = @_; $self->throw("Must supply some arguments") unless @args > 0; - + my $preserve_lengths = 0; my @nodes_to_remove; if (ref($args[0])) { $self->throw("When supplying just a list of Nodes, they must be Bio::Tree::NodeI objects") unless $args[0]->isa('Bio::Tree::NodeI'); @@ -270,6 +270,9 @@ push(@keep_nodes, $self->find_node($key => $value)); } } + elsif ($key =~ /preserve/) { + $preserve_lengths = $value; + } } if ($remove_all) { @@ -318,6 +321,7 @@ # no ancestor of our own to remove us from the tree foreach my $desc (@descs) { $desc->ancestor($ancestor); + $desc->branch_length($desc->branch_length + $node->branch_length) if $preserve_lengths; } $node->ancestor(undef); } @@ -950,11 +954,10 @@ $self->warn("Node requested for reroot is already the root node!"); return 0; } my $tmp_node = $new_root->create_node_on_branch(-position=>0,-force=>1); - # reverse the ancestor & children pointers my $former_anc = $tmp_node->ancestor; my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node); - for (my $i = 0; $i < @path_from_oldroot - 1; $i++) { + for (my $i = 0; $i < $#path_from_oldroot; $i++) { my $current = $path_from_oldroot[$i]; my $next = $path_from_oldroot[$i + 1]; $current->remove_Descendent($next); @@ -969,6 +972,7 @@ $tmp_node = undef; $new_root->branch_length(undef); + $new_root->remove_tag('B'); $old_root = undef; $self->set_root_node($new_root); @@ -976,6 +980,37 @@ return 1; } +=head2 reroot_at_midpoint + + Title : reroot_at_midpoint + Usage : $tree->reroot_at_midpoint($node, $new_root_id); + Function: Reroots a tree on a new node created halfway between the + argument and its ancestor + Returns : the new midpoint Bio::Tree::NodeIon success, 0 on failure + Args : non-root Bio::Tree::NodeI currently in $tree + scalar string, id for new node (optional) + +=cut + +sub reroot_at_midpoint { + my $self = shift; + my $node = shift; + my $id = shift; + + unless (defined $node && $node->isa("Bio::Tree::NodeI")) { + $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting"); + return 0; + } + + my $midpt = $node->create_node_on_branch(-FRACTION=>0.5); + if (defined $id) { + $self->warn("ID argument is not a scalar") if (ref $id); + $midpt->id($id) if defined($id) && !ref($id); + } + $self->reroot($midpt); + return $midpt; +} + =head2 findnode_by_id Title : findnode_by_id From bugzilla-daemon at portal.open-bio.org Wed Jul 15 11:59:02 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 11:59:02 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151559.n6FFx27a004459@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 maj at fortinbras.us changed: What |Removed |Added ---------------------------------------------------------------------------- Status|REOPENED |RESOLVED Resolution| |FIXED ------- Comment #15 from maj at fortinbras.us 2009-07-15 11:59 EST ------- committed at r15857 -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Wed Jul 15 11:59:16 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Jul 2009 11:59:16 -0400 Subject: [Bioperl-guts-l] [Bug 2877] [Bio::Tree::Tree] some bootstrap scores assigned to the wrong node after root() In-Reply-To: Message-ID: <200907151559.n6FFxG50004488@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2877 maj at fortinbras.us changed: What |Removed |Added ---------------------------------------------------------------------------- Status|RESOLVED |CLOSED -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. You are the assignee for the bug, or are watching the assignee. From maj at dev.open-bio.org Wed Jul 15 12:10:15 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 15 Jul 2009 12:10:15 -0400 Subject: [Bioperl-guts-l] [15858] bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm: pod for new splice() arg -preserve_lengths Message-ID: <200907151610.n6FGAFTx025144@dev.open-bio.org> Revision: 15858 Author: maj Date: 2009-07-15 12:10:14 -0400 (Wed, 15 Jul 2009) Log Message: ----------- pod for new splice() arg -preserve_lengths Modified Paths: -------------- bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm Modified: bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm =================================================================== --- bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm 2009-07-15 15:58:21 UTC (rev 15857) +++ bioperl-live/trunk/Bio/Tree/TreeFunctionsI.pm 2009-07-15 16:10:14 UTC (rev 15858) @@ -239,6 +239,11 @@ (-keep_id => [2]) will remove all nodes unless they have an id() of '2' (note, no -remove_*). + -preserve_lengths => 1 : setting this argument will splice out + intermediate nodes, preserving the original total length between + the ancestor and the descendants of the spliced node. Undef + by default. + =cut sub splice { From cjfields at dev.open-bio.org Wed Jul 15 17:54:00 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 15 Jul 2009 17:54:00 -0400 Subject: [Bioperl-guts-l] [15859] bioperl-live/trunk/Bio/SeqIO/scf.pm: * doc fixes, courtesy of Adam Sj?\195?\184gren Message-ID: <200907152154.n6FLs0tW025635@dev.open-bio.org> Revision: 15859 Author: cjfields Date: 2009-07-15 17:54:00 -0400 (Wed, 15 Jul 2009) Log Message: ----------- * doc fixes, courtesy of Adam Sj?\195?\184gren Modified Paths: -------------- bioperl-live/trunk/Bio/SeqIO/scf.pm Modified: bioperl-live/trunk/Bio/SeqIO/scf.pm =================================================================== --- bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-15 16:10:14 UTC (rev 15858) +++ bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-15 21:54:00 UTC (rev 15859) @@ -579,7 +579,7 @@ =head2 write_seq - Title : write_seq(-Quality => $swq, ) + Title : write_seq(-target => $swq, ) Usage : $obj->write_seq( -target => $swq, -version => 2, @@ -602,7 +602,7 @@ c) peak indices d) traces - You _can_ write an scf with just a and b by passing in a - SequenceWithQuality object- false traces will be synthesized + Bio::Seq::Quality object- false traces will be synthesized for you. =cut From cjfields at dev.open-bio.org Thu Jul 16 14:23:55 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Thu, 16 Jul 2009 14:23:55 -0400 Subject: [Bioperl-guts-l] [15860] bioperl-live/trunk/AUTHORS: * some rearranging of core authors Message-ID: <200907161823.n6GINt8A027474@dev.open-bio.org> Revision: 15860 Author: cjfields Date: 2009-07-16 14:23:53 -0400 (Thu, 16 Jul 2009) Log Message: ----------- * some rearranging of core authors * Ewan is moved to previous coordinators, Aaron moved up to core, Mark added to core, core is alphabetized (sorry Lincoln! Sendu is #1!) Modified Paths: -------------- bioperl-live/trunk/AUTHORS Modified: bioperl-live/trunk/AUTHORS =================================================================== --- bioperl-live/trunk/AUTHORS 2009-07-15 21:54:00 UTC (rev 15859) +++ bioperl-live/trunk/AUTHORS 2009-07-16 18:23:53 UTC (rev 15860) @@ -4,30 +4,34 @@ =over -=item * Ewan Birney +=item * Sendu Bala =item * Chris Dagdigian +=item * Christopher Fields + +=item * Mark Jensen + =item * Hilmar Lapp =item * Heikki Lehv\xE4slaiho +=item * Aaron Mackey + +=item * Brian Osborne + =item * Jason Stajich =item * Lincoln Stein -=item * Sendu Bala - -=item * Christopher Fields - -=item * Brian Osborne - =back =head2 Previous Bioperl Coordinators: =over +=item * Ewan Birney + =item * Steven Brenner =item * Georg Fuellen @@ -155,8 +159,6 @@ =item * Philip Lijnzaad -=item * Aaron Mackey - =item * Brad Marshall =item * Chad Matsalla From scain at dev.open-bio.org Thu Jul 16 15:36:00 2009 From: scain at dev.open-bio.org (Scott Cain) Date: Thu, 16 Jul 2009 15:36:00 -0400 Subject: [Bioperl-guts-l] [15861] bioperl-live/trunk/Bio: lengthening various name related text fields Message-ID: <200907161936.n6GJa0Qt028199@dev.open-bio.org> Revision: 15861 Author: scain Date: 2009-07-16 15:36:00 -0400 (Thu, 16 Jul 2009) Log Message: ----------- lengthening various name related text fields Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm bioperl-live/trunk/Bio/Root/Build.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2009-07-16 18:23:53 UTC (rev 15860) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2009-07-16 19:36:00 UTC (rev 15861) @@ -254,7 +254,7 @@ locationlist => < < <throw("$load_id doesn't have a primary id") unless defined $parent_id; - my @children = map {$helper->local2global($_)} @$children; # this updates the table that keeps track of parent/child relationships, # but does not update the parent object -- so (start,end) had better be right!!! Modified: bioperl-live/trunk/Bio/Root/Build.pm =================================================================== --- bioperl-live/trunk/Bio/Root/Build.pm 2009-07-16 18:23:53 UTC (rev 15860) +++ bioperl-live/trunk/Bio/Root/Build.pm 2009-07-16 19:36:00 UTC (rev 15861) @@ -156,7 +156,7 @@ closedir($scripts_dir); my $question = $int_ok ? "Install [a]ll BioPerl scripts, [n]one, or choose groups [i]nteractively?" : "Install [a]ll BioPerl scripts or [n]one?"; - my $prompt = $accept ? 'a' : $self->prompt($question, 'a'); + my $prompt = 'a'; #$accept ? 'a' : $self->prompt($question, 'a'); if ($prompt =~ /^[aA]/) { $self->log_info(" - will install all scripts\n"); @@ -492,8 +492,8 @@ my ($self, $desired, $version, $msg) = @_; unless (defined $self->{ask_optional}) { - $self->{ask_optional} = $self->args->{accept} - ? 'n' : $self->prompt("Install [a]ll optional external modules, [n]one, or choose [i]nteractively?", 'n'); + $self->{ask_optional} = 'n'; #$self->args->{accept} + #? 'n' : $self->prompt("Install [a]ll optional external modules, [n]one, or choose [i]nteractively?", 'n'); } return 'skip' if $self->{ask_optional} =~ /^n/i; From scain at dev.open-bio.org Thu Jul 16 15:37:13 2009 From: scain at dev.open-bio.org (Scott Cain) Date: Thu, 16 Jul 2009 15:37:13 -0400 Subject: [Bioperl-guts-l] [15862] bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm: lengthening various name related text fields Message-ID: <200907161937.n6GJbDoB028254@dev.open-bio.org> Revision: 15862 Author: scain Date: 2009-07-16 15:37:13 -0400 (Thu, 16 Jul 2009) Log Message: ----------- lengthening various name related text fields Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm 2009-07-16 19:36:00 UTC (rev 15861) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/Pg.pm 2009-07-16 19:37:13 UTC (rev 15862) @@ -261,7 +261,7 @@ locationlist => < < < Revision: 15863 Author: jhannah Date: 2009-07-16 15:39:09 -0400 (Thu, 16 Jul 2009) Log Message: ----------- [2515] giving this bugzilla ticket a shot Added Paths: ----------- bioperl-live/branches/jhannah/ Copied: bioperl-live/branches/jhannah (from rev 15862, bioperl-live/trunk) From bugzilla-daemon at portal.open-bio.org Thu Jul 16 15:42:29 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 16 Jul 2009 15:42:29 -0400 Subject: [Bioperl-guts-l] [Bug 2515] GenBank XML parser In-Reply-To: Message-ID: <200907161942.n6GJgTeb025484@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2515 jay at jays.net changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |jay at jays.net Status|NEW |ASSIGNED ------- Comment #4 from jay at jays.net 2009-07-16 15:42 EST ------- Starting work in a private branch. svn+ssh://jhannah at dev.open-bio.org/home/svn-repositories/bioperl/bioperl-live/branches/jhannah -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From jhannah at dev.open-bio.org Thu Jul 16 15:44:58 2009 From: jhannah at dev.open-bio.org (Jay Hannah) Date: Thu, 16 Jul 2009 15:44:58 -0400 Subject: [Bioperl-guts-l] [15864] bioperl-live/branches/jhannah/Bio/SeqIO/gbxml.pm: [2515] Adding verbatim from ticket, so I can explicitly see any changes I make later. Message-ID: <200907161944.n6GJiwIC028400@dev.open-bio.org> Revision: 15864 Author: jhannah Date: 2009-07-16 15:44:58 -0400 (Thu, 16 Jul 2009) Log Message: ----------- [2515] Adding verbatim from ticket, so I can explicitly see any changes I make later. Added Paths: ----------- bioperl-live/branches/jhannah/Bio/SeqIO/gbxml.pm Added: bioperl-live/branches/jhannah/Bio/SeqIO/gbxml.pm =================================================================== --- bioperl-live/branches/jhannah/Bio/SeqIO/gbxml.pm (rev 0) +++ bioperl-live/branches/jhannah/Bio/SeqIO/gbxml.pm 2009-07-16 19:44:58 UTC (rev 15864) @@ -0,0 +1,436 @@ +# $Id: gbxml.pm +# +# BioPerl module for Bio::SeqIO::gbxml +# +# Cared for by Ryan Golhar +# NOTE: This module is implemented on an as needed basis. As features +# are needed, they are implemented. Its very bare-bones. +# +# Based off http://www.insdc.org/page.php?page=documents&sid=105a8b52b69db9c36c82a2e0d923ca69 +# +# I tried to follow the genbank module to keep things as consistent as possible +# Right now, I'm not respecting the want_slot parameters. This will need to be added. + +=head1 NAME + +Bio::SeqIO::gbxml - GenBank sequence input/output stream using SAX + +=head1 SYNOPSIS + +It is probably best not to use this object directly, but rather go +through the SeqIO handler system. To read a GenBank XML file: + + $stream = Bio::SeqIO->new( -file => $filename, -format => 'gbxml'); + + while ( my $bioSeqObj = $stream->next_seq() ) { + # do something with $bioSeqObj + } + +To write a Seq object to the current file handle in GenBank XML format: + + $stream->write_seq( -seq => $seqObj); + +If instead you would like a XML::DOM object containing the GBXML, use: + + my $newXmlObject = $stream->to_bsml( -seq => $seqObj); + +=head1 DEPENDENCIES + +In addition to parts of the Bio:: hierarchy, this module uses: + +XML::SAX + +=head1 DESCRIPTION + +This object can transform Bio::Seq objects to and from GenBank XML +flatfiles. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l at bioperl.org - General discussion + http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via the +web: + + http://bugzilla.open-bio.org/ + +=head1 AUTHOR - Ryan Golhar + +Email golharam-at-umdnj-dot-edu + +=cut + +package Bio::SeqIO::gbxml; +use vars qw($Default_Source); +use strict; + +use Bio::SeqIO::FTHelper; +use Bio::SeqFeature::Generic; +use Bio::Species; +use XML::SAX; +use Bio::Seq::SeqFactory; +use Bio::Annotation::Collection; +use Bio::Annotation::Comment; +use Bio::Annotation::Reference; +use Bio::Annotation::DBLink; + +use base qw(Bio::SeqIO XML::SAX::Base); + +$Default_Source = 'GBXML'; + +sub _initialize { + my ($self) = shift; + $self->SUPER::_initialize(@_); + $self->{'_parser'} = XML::SAX::ParserFactory->parser('Handler' => $self); + if( ! defined $self->sequence_factory ) { + $self->sequence_factory(Bio::Seq::SeqFactory->new + (-verbose => $self->verbose(), + -type => 'Bio::Seq::RichSeq')); + } + return; +} + +=head1 METHODS + +=cut + +=head2 next_seq + +Title : next_seq +Usage : my $bioSeqObj = $stream->next_seq +Function: Retrieves the next sequence from a SeqIO::gbxml stream. +Returns : A reference to a Bio::Seq::RichSeq object +Args : + +=cut + +sub next_seq { + my $self = shift; + if( @{$self->{'_seendata'}->{'_seqs'} || []} || eof($self->_fh)) { + return shift @{$self->{'_seendata'}->{'_seqs'}}; + } + $self->{'_parser'}->parse_file($self->_fh); + return shift @{$self->{'_seendata'}->{'_seqs'}}; +} + +# XML::SAX::Base methods + +sub start_document { + my ($self,$doc) = @_; + $self->{'_seendata'} = {'_seqs' => [] #, +# '_authors' => [], +# '_feats' => [] + }; + $self->SUPER::start_document($doc); +} + +sub end_document { + my ($self,$doc) = @_; + $self->SUPER::end_document($doc); +} + + +sub start_element { + my ($self,$ele) = @_; + my $name = uc($ele->{'LocalName'}); + +# my $attr = $ele->{'Attributes'}; +# my $seqid = defined $self->{'_seendata'}->{'_seqs'}->[-1] ? +# $self->{'_seendata'}->{'_seqs'}->[-1]->display_id : undef; + +# for my $k ( keys %$attr ) { +# $attr->{uc $k} = $attr->{$k}; +# delete $attr->{$k}; +# } + + if( $name eq 'GBSET' ) { + + } elsif( $name eq 'GBSEQ' ) { + # Initialize, we are starting a new sequence. + push @{$self->{'_seendata'}->{'_seqs'}}, + $self->sequence_factory->create(); + } elsif( $name eq 'GBFEATURE' ) { + my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1]; + my $fthelper = new Bio::SeqIO::FTHelper(); + $fthelper->verbose($self->verbose()); + push @{$self->{'_seendata'}->{'_feats'}}, $fthelper; + } + +# } elsif( $name eq 'FEATURE-TABLES' ) { +# } elsif( $name eq 'database-xref' ) { +# my ($db,$id) = split(/:/,$content); +# $curseq->annotation->add_Annotation('dblink', +# Bio::Annotation::DBLink->new +# ( -database => $db, +# -primary_id=> $id)); +# } elsif( $name eq 'INTERVAL-LOC' ) { +# my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1]; +# my ($start,$end,$strand) = +# map { $attr->{'{}'.$_}->{'Value'} } qw(STARTPOS +# ENDPOS +# COMPLEMENT); + +# $curfeat->start($start); +# $curfeat->end($end); +# $curfeat->strand(-1) if($strand); +# } elsif( $name eq 'REFERENCE' ) { +# push @{$self->{'_seendata'}->{'_annot'}}, +# Bio::Annotation::Reference->new(); +# } + $self->{'_characters'} = ''; + + push @{$self->{'_state'}}, $name; + $self->SUPER::start_element($ele); +} + +sub end_element { + my ($self,$ele) = @_; + pop @{$self->{'_state'}}; + my $name = uc $ele->{'LocalName'}; + my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1]; + my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1]; + + if ($name eq 'GBSEQ_LOCUS') { + $curseq->display_id($self->{'_characters'}); + + } elsif ($name eq 'GBSEQ_LENGTH' ) { + $curseq->length($self->{'_characters'}); + + } elsif ($name eq 'GBSEQ_MOLTYPE' ) { + if ($self->{'_characters'} =~ /mRNA|dna/) { + $curseq->alphabet('dna'); + } else { + $curseq->alphabet('protein'); + } + $curseq->molecule($self->{'_characters'}); + + } elsif ($name eq 'GBSEQ_TOPOLOGY' ) { + $curseq->is_circular(($self->{'_characters'} =~ /^linear$/i) ? 0 : 1); + + } elsif ($name eq 'GBSEQ_DIVISION' ) { + $curseq->division($self->{'_characters'}); + + } elsif ($name =~ m/GBSEQ_UPDATE-DATE|GBSEQ_CREATE-DATE/ ) { + my $date = $self->{'_characters'}; + # This code was taken from genbank.pm + if($date =~ s/\s*((\d{1,2})-(\w{3})-(\d{2,4})).*/$1/) { + if( length($date) < 11 ) { # improperly formatted date + # But we'll be nice and fix it for them + my ($d,$m,$y) = ($2,$3,$4); + $d = "0$d" if( length($d) == 1 ); + # guess the century here + if( length($y) == 2 ) { + # arbitrarily guess that '60' means 1960 + $y = ($y > 60) ? "19$y" : "20$y"; + $self->warn("Date was malformed, guessing the century for $date to be $y\n"); + } + $date = [join('-',$d,$m,$y)]; + } + $curseq->add_date($date); + } + + } elsif ($name eq 'GBSEQ_DEFINITION' ) { + $curseq->description($self->{'_characters'}); + + } elsif ($name eq 'GBSEQ_PRIMARY-ACCESSION' ) { + $curseq->accession_number($self->{'_characters'}); + + } elsif ($name eq 'GBSEQ_ACCESSION-VERSION' ) { + # also taken from genbank.pm + $self->{'_characters'} =~ m/^\w+\.(\d+)/; + if ($1) { + $curseq->version($1); + $curseq->seq_version($1); + } + + } elsif ($name eq 'GBSEQID' ) { + if ($self->{'_characters'} =~ m/gi\|(\d+)/) { + $curseq->primary_id($1); + } + + } elsif ($name eq 'GBSEQ_SOURCE') { + $self->{'_taxa'}->{'_common'} = $self->{'_characters'}; + + } elsif ($name eq 'GBSEQ_ORGANISM' ) { + # taken from genbank.pm + my @organell_names = ("chloroplast", "mitochondr"); + my @spflds = split(' ', $self->{'_characters'}); + + $_ = $self->{'_characters'}; + if (grep { $_ =~ /^$spflds[0]/i; } @organell_names) { + $self->{'_taxa'}->{'_organelle'} = shift(@spflds); + } + $self->{'_taxa'}->{'_genus'} = shift(@spflds); + $self->{'_taxa'}->{'_species'} = shift(@spflds) if (@spflds); + $self->{'_taxa'}->{'_sub_species'} = shift(@spflds) if (@spflds); + $self->{'_taxa'}->{'_ns_name'} = $self->{'_characters'}; + + } elsif ($name eq 'GBSEQ_TAXONOMY' ) { + # taken from genbank.pm + $_ = $self->{'_characters'}; + my @class; + push (@class, map { s/^\s+//; s/\s+$//; $_; } split /[;\.]+/, $_); + + next unless $self->{'_taxa'}->{'_genus'} and $self->{'_taxa'}->{'_genus'} !~ /^(unknown|None)$/oi; + if ($class[0] eq 'Viruses') { + push( @class, $self->{'_taxa'}->{'_ns_name'} ); + } + elsif ($class[$#class] eq $self->{'_taxa'}->{'_genus'}) { + push( @class, $self->{'_taxa'}->{'_species'} ); + } else { + push( @class, $self->{'_taxa'}->{'_genus'}, $self->{'_taxa'}->{'_species'} ); + } + @class = reverse @class; + + my $make = Bio::Species->new(); + $make->classification( \@class, "FORCE"); + $make->common_name($self->{'_taxa'}->{'_common'}) if $self->{'_taxa'}->{'_common'}; + unless ($class[-1] eq 'Viruses') { + $make->sub_species( $self->{'_taxa'}->{'_sub_species'} ) if $self->{'_taxa'}->{'_sub_species'}; + } + $make->organelle( $self->{'_taxa'}->{'_organelle'} ) if $self->{'_taxa'}->{'_organelle'}; + $curseq->species($make); + delete $self->{'_taxa'}; + + } elsif( $name eq 'GBSEQ_COMMENT' ) { + $curseq->annotation->add_Annotation('comment', Bio::Annotation::Comment->new(-text => $self->{'_characters'} )) if ($self->{'_characters'}); + + } elsif ($name eq 'GBFEATURE_KEY' ) { + $curfeat->key($self->{'_characters'}); + + } elsif ($name eq 'GBFEATURE_LOCATION' ) { + $curfeat->loc($self->{'_characters'}); + + } elsif ($name eq 'GBQUALIFIER_NAME' ) { + $self->{'_feature'}->{"_qualifer_name"} = $self->{'_characters'}; + + } elsif ($name eq 'GBQUALIFIER_VALUE' ) { + my $qualifier = $self->{'_feature'}->{"_qualifer_name"}; + delete $self->{'_feature'}->{"_qualifer_name"}; + + $curfeat->field->{$qualifier} ||= []; + push(@{$curfeat->field->{$qualifier}}, $self->{'_characters'}); + + } elsif ($name eq 'GBSEQ_SEQUENCE' ) { + $curseq->seq($self->{'_characters'}); + + } elsif( $name eq 'GBFEATURE' ) { + shift @{$self->{'_seendata'}->{'_feats'}}; + # copied from genbank.pm @@ Diff output truncated at 10000 characters. @@ From scain at dev.open-bio.org Thu Jul 16 16:00:06 2009 From: scain at dev.open-bio.org (Scott Cain) Date: Thu, 16 Jul 2009 16:00:06 -0400 Subject: [Bioperl-guts-l] [15865] bioperl-live/trunk/Bio/Root/Build.pm: undoing an accidental commit--sorry about that Message-ID: <200907162000.n6GK06qt028422@dev.open-bio.org> Revision: 15865 Author: scain Date: 2009-07-16 16:00:06 -0400 (Thu, 16 Jul 2009) Log Message: ----------- undoing an accidental commit--sorry about that Modified Paths: -------------- bioperl-live/trunk/Bio/Root/Build.pm Modified: bioperl-live/trunk/Bio/Root/Build.pm =================================================================== --- bioperl-live/trunk/Bio/Root/Build.pm 2009-07-16 19:44:58 UTC (rev 15864) +++ bioperl-live/trunk/Bio/Root/Build.pm 2009-07-16 20:00:06 UTC (rev 15865) @@ -156,7 +156,7 @@ closedir($scripts_dir); my $question = $int_ok ? "Install [a]ll BioPerl scripts, [n]one, or choose groups [i]nteractively?" : "Install [a]ll BioPerl scripts or [n]one?"; - my $prompt = 'a'; #$accept ? 'a' : $self->prompt($question, 'a'); + my $prompt = $accept ? 'a' : $self->prompt($question, 'a'); if ($prompt =~ /^[aA]/) { $self->log_info(" - will install all scripts\n"); @@ -492,8 +492,8 @@ my ($self, $desired, $version, $msg) = @_; unless (defined $self->{ask_optional}) { - $self->{ask_optional} = 'n'; #$self->args->{accept} - #? 'n' : $self->prompt("Install [a]ll optional external modules, [n]one, or choose [i]nteractively?", 'n'); + $self->{ask_optional} = $self->args->{accept} + ? 'n' : $self->prompt("Install [a]ll optional external modules, [n]one, or choose [i]nteractively?", 'n'); } return 'skip' if $self->{ask_optional} =~ /^n/i; From jhannah at dev.open-bio.org Thu Jul 16 16:13:26 2009 From: jhannah at dev.open-bio.org (Jay Hannah) Date: Thu, 16 Jul 2009 16:13:26 -0400 Subject: [Bioperl-guts-l] [15866] bioperl-live/branches/jhannah/t: [2515] Trying to get the ball rolling, but already tripping on next_seq(). Message-ID: <200907162013.n6GKDQ5o028509@dev.open-bio.org> Revision: 15866 Author: jhannah Date: 2009-07-16 16:13:26 -0400 (Thu, 16 Jul 2009) Log Message: ----------- [2515] Trying to get the ball rolling, but already tripping on next_seq(). Added Paths: ----------- bioperl-live/branches/jhannah/t/SeqIO/gbxml.t bioperl-live/branches/jhannah/t/data/roa1.gbxml Added: bioperl-live/branches/jhannah/t/SeqIO/gbxml.t =================================================================== --- bioperl-live/branches/jhannah/t/SeqIO/gbxml.t (rev 0) +++ bioperl-live/branches/jhannah/t/SeqIO/gbxml.t 2009-07-16 20:13:26 UTC (rev 15866) @@ -0,0 +1,29 @@ +# -*-Perl-*- Test Harness script for Bioperl +# $Id$ + +use strict; + +BEGIN { + use lib '.'; + use Bio::Root::Test; + + test_begin(-tests => 260); + + use_ok('Bio::SeqIO::genbank'); +} + +my $verbose = test_debug(); + +my $ast = Bio::SeqIO->new(-format => 'gbxml', + -verbose => $verbose, + -file => test_input_file('roa1.gbxml')); +isa_ok($ast, 'Bio::SeqIO'); +$ast->verbose($verbose); +my $as = $ast->next_seq(); +is $as->molecule, 'mRNA',$as->accession_number; +is $as->alphabet, 'dna'; +is($as->primary_id, 3598416); +my @class = $as->species->classification; +is $class[$#class],'Eukaryota'; + + Property changes on: bioperl-live/branches/jhannah/t/SeqIO/gbxml.t ___________________________________________________________________ Name: svn:keywords + Id Added: bioperl-live/branches/jhannah/t/data/roa1.gbxml =================================================================== --- bioperl-live/branches/jhannah/t/data/roa1.gbxml (rev 0) +++ bioperl-live/branches/jhannah/t/data/roa1.gbxml 2009-07-16 20:13:26 UTC (rev 15866) @@ -0,0 +1,208 @@ + + + + + + + + + + + + dbEST + + + 1888424 + + + + + + + + + AI129902 + 1 + + + + + 3598416 + + + + + + + + 3 + 2 + + + + + qc41b07.x1 Soares_pregnant_uterus_NbHPU Homo sapiens cDNA clone IMAGE:1712149 3' similar to SW:ROA1_SCHAM P21522 HETEROGENEOUS NUCLEAR RIBONUCLEOPROTEIN A1, A2/B1 HOMOLOG. ;contains MSR1.b2 MSR1 repetitive element ;. + + + + + + + 1998 + 9 + 10 + + + + + + + + + + + 1998 + 10 + 27 + + + + + + + Contact: Robert Strausberg, Ph.D.~Email: cgapbs-r at mail.nih.gov~This clone is available royalty-free through LLNL ; contact the IMAGE Consortium (info at image.llnl.gov) for further information. ~Trace considered overall poor quality~Insert Length: 525 Std Error: 0.00~Seq primer: -40m13 fwd. ET from Amersham~High quality sequence stop: 1 + + + + + + + Homo sapiens + human + + + taxon + + + 9606 + + + + + + + + + + Homo + sapiens + + + + + + 255 + Organ: uterus; Vector: pT7T3D-PacI; Site_1: Not I; Site_2: Eco RI; 1st strand cDNA was primed with a Not I - oligo(dT) primer [5' AACTGGAAGAATTCGCGGCCGCCTTTTTTTTTTTTTTTTTT 3'], double-stranded cDNA was ligated to Eco RI adaptors (Pharmacia), digested with Not I and cloned into the Not I and Eco RI sites of the modified pT7T3 vector. Library went through one round of normalization. Library constructed by M. Fatima Bonaldo. + + + Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo + 1 + 2 + PRI + + + + + + + 3 + IMAGE:1712149 + + + 11 + Soares_pregnant_uterus_NbHPU + + + 7 + female + + + 12 + adult + + + 16 + DH10B + + + + + + + + + + + + + + Unpublished + + + + + + + + + + NCI-CGAP http://www.ncbi.nlm.nih.gov/ncicgap + + + + + + + + + + + + + + 1997 + + + + + National Cancer Institute, Cancer Genome Anatomy Project (CGAP), Tumor Gene Index + + + + + + + + + + + + + @@ Diff output truncated at 10000 characters. @@ From rbuels at dev.open-bio.org Fri Jul 17 03:58:23 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Fri, 17 Jul 2009 03:58:23 -0400 Subject: [Bioperl-guts-l] [15867] bioperl-live/branches/yapc10hackathon/: removing yapc10hackathon branch, it has run its course Message-ID: <200907170758.n6H7wN6C030165@dev.open-bio.org> Revision: 15867 Author: rbuels Date: 2009-07-17 03:58:23 -0400 (Fri, 17 Jul 2009) Log Message: ----------- removing yapc10hackathon branch, it has run its course Removed Paths: ------------- bioperl-live/branches/yapc10hackathon/ From bugzilla-daemon at portal.open-bio.org Sat Jul 18 16:23:51 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 18 Jul 2009 16:23:51 -0400 Subject: [Bioperl-guts-l] [Bug 2881] New: An .scf file written from a Bio::Seq::Quality object cannot be read again Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2881 Summary: An .scf file written from a Bio::Seq::Quality object cannot be read again Product: BioPerl Version: main-trunk Platform: PC OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Bio::SeqIO AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: asjo at koldfront.dk If you use Bio::SeqIO to write an .scf file of a Bio::Seq::Quality object, traces are synthesized (faked). But if you read that file again with Bio::SeqIO, it fails: * http://thread.gmane.org/gmane.comp.lang.perl.bio.general/20016 The problem is that both Bio::SeqIO::scf::write_seq _and_ Bio::Seq::SequenceTrace::new calls _synthesize_traces(), which happens to make the peak_indices array twice as long as it should be, leaving the resulting .scf file bad. The attached patch removes the call to _synthesize_traces() and set_accuracies() from Bio::SeqIO::scf::write_seq, as both are handled in Bio::Seq::SequenceTrace::new. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sat Jul 18 16:24:39 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 18 Jul 2009 16:24:39 -0400 Subject: [Bioperl-guts-l] [Bug 2881] An .scf file written from a Bio::Seq::Quality object cannot be read again In-Reply-To: Message-ID: <200907182024.n6IKOd3u008181@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2881 ------- Comment #1 from asjo at koldfront.dk 2009-07-18 16:24 EST ------- Created an attachment (id=1343) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1343&action=view) Remove calls to _synthesize_traces() and add_accuracies() from Bio::SeqIO::scf::write_seq -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Sat Jul 18 17:56:14 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 18 Jul 2009 17:56:14 -0400 Subject: [Bioperl-guts-l] [15868] bioperl-live/trunk/Bio/SeqIO/scf.pm: [bug 2881] Message-ID: <200907182156.n6ILuEVE003230@dev.open-bio.org> Revision: 15868 Author: cjfields Date: 2009-07-18 17:56:12 -0400 (Sat, 18 Jul 2009) Log Message: ----------- [bug 2881] * patch courtesy of Adam Sj?\195?\184gren * tests to be added Modified Paths: -------------- bioperl-live/trunk/Bio/SeqIO/scf.pm Modified: bioperl-live/trunk/Bio/SeqIO/scf.pm =================================================================== --- bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-17 07:58:23 UTC (rev 15867) +++ bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-18 21:56:12 UTC (rev 15868) @@ -617,12 +617,9 @@ if (ref($swq) eq "Bio::Seq::Quality") { # this means that the object *has no trace data* # we might as well synthesize some now, ok? - my $swq2 = Bio::Seq::SequenceTrace->new( + $swq = Bio::Seq::SequenceTrace->new( -swq => $swq ); - $swq2->_synthesize_traces(); - $swq2->set_accuracies(); - $swq = $swq2; } } else { From cjfields at dev.open-bio.org Sat Jul 18 18:20:18 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 18 Jul 2009 18:20:18 -0400 Subject: [Bioperl-guts-l] [15869] bioperl-live/trunk/Bio/SeqIO/scf.pm: should have success on successful write_seq() Message-ID: <200907182220.n6IMKI0s003306@dev.open-bio.org> Revision: 15869 Author: cjfields Date: 2009-07-18 18:20:18 -0400 (Sat, 18 Jul 2009) Log Message: ----------- should have success on successful write_seq() Modified Paths: -------------- bioperl-live/trunk/Bio/SeqIO/scf.pm Modified: bioperl-live/trunk/Bio/SeqIO/scf.pm =================================================================== --- bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-18 21:56:12 UTC (rev 15868) +++ bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-18 22:20:18 UTC (rev 15869) @@ -756,6 +756,7 @@ $self->flush if $self->_flush_on_write && defined $self->_fh; $self->close(); + return 1; } From cjfields at dev.open-bio.org Sat Jul 18 18:23:35 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 18 Jul 2009 18:23:35 -0400 Subject: [Bioperl-guts-l] [15870] bioperl-live/trunk/Bio/SeqIO/scf.pm: * fix small pack issue Message-ID: <200907182223.n6IMNZbB003337@dev.open-bio.org> Revision: 15870 Author: cjfields Date: 2009-07-18 18:23:35 -0400 (Sat, 18 Jul 2009) Log Message: ----------- * fix small pack issue Modified Paths: -------------- bioperl-live/trunk/Bio/SeqIO/scf.pm Modified: bioperl-live/trunk/Bio/SeqIO/scf.pm =================================================================== --- bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-18 22:20:18 UTC (rev 15869) +++ bioperl-live/trunk/Bio/SeqIO/scf.pm 2009-07-18 22:23:35 UTC (rev 15870) @@ -906,7 +906,7 @@ $returner->{sequence} = $trace->seq(); $length = scalar(@accuracies); # this really is "c" for samplesize == 2 - $returner->{accuracies}->{binary} = pack "c${length}", at accuracies; + $returner->{accuracies}->{binary} = pack "C${length}", at accuracies; $returner->{accuracies}->{length} = CORE::length($returner->{accuracies}->{binary}); $length = $trace->seq_obj()->length(); From cjfields at dev.open-bio.org Sat Jul 18 18:30:43 2009 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 18 Jul 2009 18:30:43 -0400 Subject: [Bioperl-guts-l] [15871] bioperl-live/trunk/t/SeqIO/scf.t: [bug 2881] Message-ID: <200907182230.n6IMUh0C003378@dev.open-bio.org> Revision: 15871 Author: cjfields Date: 2009-07-18 18:30:42 -0400 (Sat, 18 Jul 2009) Log Message: ----------- [bug 2881] * simple round-trip tests; I noticed that display_id doesn't carry over, so TODO'ing that. qual works though. Modified Paths: -------------- bioperl-live/trunk/t/SeqIO/scf.t Modified: bioperl-live/trunk/t/SeqIO/scf.t =================================================================== --- bioperl-live/trunk/t/SeqIO/scf.t 2009-07-18 22:23:35 UTC (rev 15870) +++ bioperl-live/trunk/t/SeqIO/scf.t 2009-07-18 22:30:42 UTC (rev 15871) @@ -7,7 +7,7 @@ use lib '.'; use Bio::Root::Test; - test_begin(-tests => 59); + test_begin(-tests => 78); use_ok('Bio::SeqIO::scf'); use_ok('Bio::Seq::SequenceTrace'); @@ -44,7 +44,7 @@ my $indexcount = 761; is (scalar(@indices), $indexcount); -use Data::Dumper; +#use Data::Dumper; #---------------------------------------- isa_ok $swq->seq_obj, 'Bio::Seq::Quality'; isa_ok $swq->qual_obj, 'Bio::Seq::Quality'; @@ -253,4 +253,37 @@ $out_scf->write_seq( -target => $v3, -version => 2 ); -# now some version 2 things... +# simple round trip tests (bug 2881) + +my %file_map = ( + # filename # write_seq args + 'chad100.scf' => 1, + '13-pilE-F.scf' => 1, + 'version2.scf' => 1, + 'version3.scf' => 1 + ); + +for my $f (sort keys %file_map) { + my $outfile = test_output_file(); + my $in = Bio::SeqIO->new(-file => test_input_file($f), + -format => 'scf'); + my $out = Bio::SeqIO->new(-file => ">$outfile", + -format => 'scf'); + + my $seq1 = $in->next_seq(); + isa_ok($seq1, 'Bio::Seq::SequenceTrace'); + + ok($out->write_seq(-target => $seq1)); + + my $in2 = Bio::SeqIO->new(-file => "<$outfile", + -format => 'scf'); + my $seq2 = $in2->next_seq(); + isa_ok($seq2, 'Bio::Seq::SequenceTrace'); + if ($seq1->display_id) { + TODO: { + local $TODO = "display_id doesn't round trip yet"; + is($seq1->display_id, $seq2->display_id, 'display_id matches'); + } + } + is_deeply($seq1->qual, $seq2->qual, 'qual scores match'); +} \ No newline at end of file From bugzilla-daemon at portal.open-bio.org Sat Jul 18 18:32:16 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 18 Jul 2009 18:32:16 -0400 Subject: [Bioperl-guts-l] [Bug 2881] An .scf file written from a Bio::Seq::Quality object cannot be read again In-Reply-To: Message-ID: <200907182232.n6IMWG5i011383@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2881 ------- Comment #2 from cjfields at bioperl.org 2009-07-18 18:32 EST ------- Committed patch and tests in r15871. I noticed that display_id isn't carried over (and I'm unsure about other data). Might be worth checking out, so I'll leave open for now. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sat Jul 18 18:57:07 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 18 Jul 2009 18:57:07 -0400 Subject: [Bioperl-guts-l] [Bug 2881] An .scf file written from a Bio::Seq::Quality object cannot be read again In-Reply-To: Message-ID: <200907182257.n6IMv7iU011939@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2881 ------- Comment #3 from asjo at koldfront.dk 2009-07-18 18:57 EST ------- (In reply to comment #2) > Committed patch and tests in r15871. Cool, thanks! And nice with the added tests, although I don't think any of them exercises the codepath that my patch touches, as it is only followed when you write_seq() a Bio::Seq::Quality object, and I think you get Bio::Seq::SequenceTrace objects back when you read an .scf file? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sat Jul 18 19:39:03 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 18 Jul 2009 19:39:03 -0400 Subject: [Bioperl-guts-l] [Bug 2881] An .scf file written from a Bio::Seq::Quality object cannot be read again In-Reply-To: Message-ID: <200907182339.n6INd3C9012980@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2881 ------- Comment #4 from asjo at koldfront.dk 2009-07-18 19:39 EST ------- Created an attachment (id=1344) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1344&action=view) Test scf-roundtrip when synthesizing traces Here's a patch that adds two tests of the synthesize trace-code path. Both fail without r15868 - the second dies. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From mcook at dev.open-bio.org Tue Jul 21 12:48:46 2009 From: mcook at dev.open-bio.org (Malcom Cook) Date: Tue, 21 Jul 2009 12:48:46 -0400 Subject: [Bioperl-guts-l] [15872] bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t: initial commit Message-ID: <200907211648.n6LGmkMs013063@dev.open-bio.org> Revision: 15872 Author: mcook Date: 2009-07-21 12:48:45 -0400 (Tue, 21 Jul 2009) Log Message: ----------- initial commit Added Paths: ----------- bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t Added: bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t =================================================================== --- bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t (rev 0) +++ bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t 2009-07-21 16:48:45 UTC (rev 15872) @@ -0,0 +1,75 @@ +# -*-Perl-*- Test Harness script for Bioperl +# $Id: $ + +# malcolm_cook at stowers.org: this test is in a separate file from +# RemoteBlast.t (on which it is modelled) since there is some sort of +# side-effecting between the multiple remote blasts that is causing +# this test to fail, if it comes last, or the other test to fail, if +# this one comes first. THIS IS A BUG EITHER IN REMOTE BLAST OR MY +# UNDERSTANDING, i.e. of how to initialize it. + +use strict; + +BEGIN { + use lib '.'; + use Bio::Root::Test; + + test_begin(-tests => 3, + -requires_modules => [qw(IO::String LWP LWP::UserAgent)], + -requires_networking => 1); + + use_ok('Bio::Tools::Run::RemoteBlast'); +} + +my $v = test_debug(); +my $inputfilename = test_input_file('ecolitst.fa'); +ok( -e $inputfilename); + +my $remote_rpsblast = Bio::Tools::Run::RemoteBlast->new + ('-verbose' => test_debug(), + '-prog' => 'blastp', + '-data' => 'cdsearch/cdd', + '-readmethod' => 'blasttable', + '-expect' => '1e-10', + ); + +$remote_rpsblast->retrieve_parameter('ALIGNMENT_VIEW', 'Tabular'); + +# This is the key to getting job run using rpsblast: +$Bio::Tools::Run::RemoteBlast::HEADER{'SERVICE'} = 'rpsblast'; + +ok($remote_rpsblast->submit_blast($inputfilename),'rpsblast blasttable submitted'); + +print STDERR "waiting..." if( $v > 0 ); +while ( my @rids = $remote_rpsblast->each_rid ) { + foreach my $rid ( @rids ) { + my $rc = $remote_rpsblast->retrieve_blast($rid); + if ( !ref($rc) ) { + if ( $rc < 0 ) { + die "need a better solution for when 'Server failed to return any data'"; + } + $remote_rpsblast->remove_rid($rid); + print STDERR "." if ( $v > 0 ); + sleep 5; + } else { + ok(1,'retrieve_blast succeeded'); + $remote_rpsblast->remove_rid($rid); + my $count = 0; + while (my $result = $rc->next_result) { + while ( my $hit = $result->next_hit ) { + $count++; + next unless ( $v > 0); + print "sbjct name is ", $hit->name, "\n"; + while ( my $hsp = $hit->next_hsp ) { + print "score is ", $hsp->bits, "\n"; + } + } + } + is($count, 44, 'correct result count'); # of course, this could change whenever CDD changes + } + } +} + +# To be a good citizen, we should restore the default NCBI service +# ('plain') for future tests +$Bio::Tools::Run::RemoteBlast::HEADER{'SERVICE'} = 'plain'; From mcook at dev.open-bio.org Tue Jul 21 12:51:32 2009 From: mcook at dev.open-bio.org (Malcom Cook) Date: Tue, 21 Jul 2009 12:51:32 -0400 Subject: [Bioperl-guts-l] [15873] bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t: fixed svn header Message-ID: <200907211651.n6LGpWQp013263@dev.open-bio.org> Revision: 15873 Author: mcook Date: 2009-07-21 12:51:32 -0400 (Tue, 21 Jul 2009) Log Message: ----------- fixed svn header Modified Paths: -------------- bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t Modified: bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t =================================================================== --- bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t 2009-07-21 16:48:45 UTC (rev 15872) +++ bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t 2009-07-21 16:51:32 UTC (rev 15873) @@ -1,5 +1,5 @@ # -*-Perl-*- Test Harness script for Bioperl -# $Id: $ +# $Id$ # malcolm_cook at stowers.org: this test is in a separate file from # RemoteBlast.t (on which it is modelled) since there is some sort of From mcook at dev.open-bio.org Tue Jul 21 12:57:54 2009 From: mcook at dev.open-bio.org (Malcom Cook) Date: Tue, 21 Jul 2009 12:57:54 -0400 Subject: [Bioperl-guts-l] [15874] bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t: getting propset to work for ID Message-ID: <200907211657.n6LGvsfV013337@dev.open-bio.org> Revision: 15874 Author: mcook Date: 2009-07-21 12:57:54 -0400 (Tue, 21 Jul 2009) Log Message: ----------- getting propset to work for ID Property Changed: ---------------- bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t Property changes on: bioperl-live/trunk/t/Tools/Run/RemoteBlast_rpsblast.t ___________________________________________________________________ Name: svn:keywords + Author Date Id Rev URL From chmille4 at dev.open-bio.org Tue Jul 21 15:20:00 2009 From: chmille4 at dev.open-bio.org (Chase Miller) Date: Tue, 21 Jul 2009 15:20:00 -0400 Subject: [Bioperl-guts-l] [15875] bioperl-dev/trunk/Bio: Fixed some bugs for the handling of linking taxa data to matrices and trees Message-ID: <200907211920.n6LJK0ap014755@dev.open-bio.org> Revision: 15875 Author: chmille4 Date: 2009-07-21 15:20:00 -0400 (Tue, 21 Jul 2009) Log Message: ----------- Fixed some bugs for the handling of linking taxa data to matrices and trees Modified Paths: -------------- bioperl-dev/trunk/Bio/Nexml/Util.pm bioperl-dev/trunk/Bio/Nexml.pm Modified: bioperl-dev/trunk/Bio/Nexml/Util.pm =================================================================== --- bioperl-dev/trunk/Bio/Nexml/Util.pm 2009-07-21 16:57:54 UTC (rev 15874) +++ bioperl-dev/trunk/Bio/Nexml/Util.pm 2009-07-21 19:20:00 UTC (rev 15875) @@ -157,14 +157,14 @@ sub _make_tree { my($self, $proj) = @_; my @trees; - my $taxa = $proj->get_taxa(); + #my $taxa = $proj->get_taxa(); my $forests = $proj->get_forests(); foreach my $forest (@$forests) { - my $basename = $forest->get_name(); - my $trees = $forest->get_entities(); - + my $basename = $forest->get_name(); + my $taxa = $forest->get_taxa(); + my $trees = $forest->get_entities(); foreach my $t (@$trees) { @@ -195,7 +195,7 @@ #transfer attributes that apply to all nodes #check if taxa data exists for the current node ($terminal) - my $taxa_ents = $taxa->[0]->get_entities(); + my $taxa_ents = $taxa->get_entities(); foreach my $taxon (@$taxa_ents) { if($taxon eq $terminal->get_taxon()) { @@ -392,12 +392,10 @@ my @feats = $aln->get_all_SeqFeatures(); my $taxa = $factory->create_taxa(); for my $seq ( @seqs ) { - #create taxa - + #create datum linked to taxa my $datum = create_bphylo_datum($seq, \@feats, $taxa, '-type_object' => $to); $matrix->insert($datum); } - #$self->_print($matrix->to_xml()); return $matrix, $taxa; } else { @@ -409,7 +407,6 @@ my ($self, $seq, @args) = @_; my $type = $seq->alphabet || $seq->_guess_alphabet || 'dna'; $type = uc($type); - #my $dat = $fac->create_datum( '-type' => $type); my @feats = $seq->get_all_SeqFeatures(); my $taxa = $fac->create_taxa(); @@ -449,11 +446,7 @@ my $matrix = $fac->create_matrix(-type => $type); $matrix->set_name($seq->display_name()); - print $dat->to_xml(); $matrix->insert($dat); - #my $proj = $fac->create_project(); - #$proj->insert($matrix); - return $matrix, $taxa; } @@ -508,7 +501,7 @@ my $taxon_name = ($feat->get_tag_values('taxon'))[0]; $taxon = $fac->create_taxon(-name => $taxon_name); $taxa->insert($taxon); - $self->set_taxon($taxa->get_by_name($taxon_name)); + $self->set_taxon($taxa->get_by_name($taxon_name)); #think i can change this to just set_taxon($taxon) } } Modified: bioperl-dev/trunk/Bio/Nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/Nexml.pm 2009-07-21 16:57:54 UTC (rev 15874) +++ bioperl-dev/trunk/Bio/Nexml.pm 2009-07-21 19:20:00 UTC (rev 15875) @@ -109,7 +109,6 @@ my %params = @args; my $file_string = $params{'-file'}; - $self->{'_seqIO'} = Bio::SeqIO::nexml->new(@args); $self->{'_alnIO'} = Bio::AlignIO::nexml->new(@args); $self->{'_treeIO'} = Bio::TreeIO::nexml->new(@args); @@ -198,28 +197,25 @@ my $ent; my $taxa_o; my $phylo_tree_o; - my $first_taxa; foreach my $tree (@$trees) { ($phylo_tree_o, $taxa_o) = Bio::Nexml::Util->create_bphylo_tree($tree); - #check if taxa exists - if (!$first_taxa) { - $first_taxa = $taxa_o; - } - link_taxa($self, $taxa_o, $forest, \@taxas, $first_taxa); + link_taxa($self, $taxa_o, $forest, \@taxas); + $forest->insert($phylo_tree_o); } - #converts matrices to Bio::Phylo objects + #convert matrices to Bio::Phylo objects my $matrices = Bio::Phylo::Matrices->new(); my ($phylo_matrix_o, @matrix_taxas); foreach my $aln (@$alns) { ($phylo_matrix_o, $taxa_o) = Bio::Nexml::Util->create_bphylo_aln($aln); - #check if taxa exists - link_taxa($self, $taxa_o, $phylo_matrix_o, \@matrix_taxas, $first_taxa); + + #link_taxa and check for already existing identical taxa + link_taxa($self, $taxa_o, $phylo_matrix_o, \@matrix_taxas); $matrices->insert($phylo_matrix_o); } @@ -245,44 +241,77 @@ sub link_taxa { - my ($self, $taxa_o, $phylo_cont_o, $taxas, $first_taxa) = @_; + my ($self, $taxa_o, $phylo_cont_o, $taxas) = @_; + + my $duplicate_taxa; + my $new_taxa_ents = $taxa_o->get_entities(); + #test if taxa_o is already present + foreach my $taxa (@$taxas) + { + my $taxa_ents = $taxa->get_entities; + my $new_num_taxa = @$new_taxa_ents; + my $num_taxa = @$taxa_ents; + + #check if the taxa have same number of elements + if($new_num_taxa != $num_taxa) {next;} + + my %taxa_o = map {($_)->get_name(), 1} @$taxa_ents; + my @difference = grep {!$taxa_o {($_)->get_name()}} @$new_taxa_ents; + + if (!@difference) { + $duplicate_taxa = $taxa; + last; + } + } + if (!$duplicate_taxa) { + push @$taxas, $taxa_o; + $phylo_cont_o->set_taxa($taxa_o); + } + else #TODO make this work for multiple forests with different taxa + { + if ($phylo_cont_o->isa('Bio::Phylo::Matrices::Matrix')) { + $phylo_cont_o->set_taxa($taxa_o); + } - if ($taxa_o->first() && !exists $taxas->[ get_taxa_labels($taxa_o) ]) { - $phylo_cont_o->set_taxa($taxa_o); - push @$taxas, get_taxa_labels($taxa_o); - } - else #TODO make this work for multiple forests with different taxa + my $present_taxa_ents = $duplicate_taxa->get_entities(); + my %present_taxa_ents = map {($_)->get_name, $_} @$present_taxa_ents; + + foreach my $new_taxa_ent (@$new_taxa_ents) { - my $ents = $taxa_o->get_entities(); - - my $main_taxa = $first_taxa->get_entities(); - for (my $i = 0; $i < @$ents; $i++) - { - my $new_label = $ents->[$i]->get_name(); - my $old_label = $main_taxa->[$i]->get_name(); - if( $new_label != $old_label) { - $self->throw("taxa conversion error - taxa not identical"); + my $new_label = $new_taxa_ent->get_name(); + #If tree get nodes and change taxa to point to already present ($duplicated_taxa) taxa + if($phylo_cont_o->isa('Bio::Phylo::Forest')) { + my $nodes = $new_taxa_ent->get_nodes(); + foreach my $node (@$nodes) + { + $new_taxa_ent->unset_node($node); + $present_taxa_ents{$new_label}->set_nodes($node); } - my $id = $ents->[$i]->get_xml_id(); - my $mid = $main_taxa->[$i]->get_xml_id(); - $ents->[$i]->set_xml_id($mid); } - } -} - -sub get_taxa_labels -{ - my $taxa = shift(@_); - my $ents = $taxa->get_entities(); - - my $label_str = undef; - - foreach my $ent (@$ents) - { - $label_str .= $ent->get_name(); + #If matrix get data and change the taxa to point to already present ($duplicated_taxa) taxa + elsif($phylo_cont_o->isa('Bio::Phylo::Matrices::Matrix')) { + my $data = $new_taxa_ent->get_data(); + foreach my $datum (@$data) + { + $new_taxa_ent->unset_datum($datum); + $present_taxa_ents{$new_label}->set_data($datum); + } + $phylo_cont_o->set_taxa($duplicate_taxa); + } + else { + $self->throw("Object container must be either Forest or Matrix"); + } + my $xml_id = $present_taxa_ents{$new_label}; + if( !$xml_id) { + $self->throw("taxa conversion error - taxa not identical"); + } + } } - return $label_str; } +<<<<<<< .mine + +1;======= 1; +>>>>>>> .r15874 From chmille4 at dev.open-bio.org Tue Jul 21 15:24:39 2009 From: chmille4 at dev.open-bio.org (Chase Miller) Date: Tue, 21 Jul 2009 15:24:39 -0400 Subject: [Bioperl-guts-l] [15876] bioperl-dev/trunk/Bio/Nexml.pm: Fixed some bugs for the handling of linking taxa data to matrices and trees Message-ID: <200907211924.n6LJOdQa014906@dev.open-bio.org> Revision: 15876 Author: chmille4 Date: 2009-07-21 15:24:39 -0400 (Tue, 21 Jul 2009) Log Message: ----------- Fixed some bugs for the handling of linking taxa data to matrices and trees Modified Paths: -------------- bioperl-dev/trunk/Bio/Nexml.pm Modified: bioperl-dev/trunk/Bio/Nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/Nexml.pm 2009-07-21 19:20:00 UTC (rev 15875) +++ bioperl-dev/trunk/Bio/Nexml.pm 2009-07-21 19:24:39 UTC (rev 15876) @@ -310,8 +310,4 @@ } } -<<<<<<< .mine - -1;======= 1; ->>>>>>> .r15874 From maj at dev.open-bio.org Tue Jul 21 17:09:44 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Tue, 21 Jul 2009 17:09:44 -0400 Subject: [Bioperl-guts-l] [15877] bioperl-dev/branches/: branches for the dev Message-ID: <200907212109.n6LL9ic3015395@dev.open-bio.org> Revision: 15877 Author: maj Date: 2009-07-21 17:09:43 -0400 (Tue, 21 Jul 2009) Log Message: ----------- branches for the dev Added Paths: ----------- bioperl-dev/branches/ From maj at dev.open-bio.org Tue Jul 21 17:13:37 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Tue, 21 Jul 2009 17:13:37 -0400 Subject: [Bioperl-guts-l] [15878] bioperl-dev/branches/maj-nexml/: "fork" Chase's stuff and fiddle Message-ID: <200907212113.n6LLDbRL015466@dev.open-bio.org> Revision: 15878 Author: maj Date: 2009-07-21 17:13:37 -0400 (Tue, 21 Jul 2009) Log Message: ----------- "fork" Chase's stuff and fiddle Added Paths: ----------- bioperl-dev/branches/maj-nexml/ Copied: bioperl-dev/branches/maj-nexml (from rev 15877, bioperl-dev/trunk) From maj at dev.open-bio.org Tue Jul 21 18:17:22 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Tue, 21 Jul 2009 18:17:22 -0400 Subject: [Bioperl-guts-l] [15879] bioperl-dev/branches/maj-nexml/t: refactor to use Bio::Root::Test Message-ID: <200907212217.n6LMHMr1015583@dev.open-bio.org> Revision: 15879 Author: maj Date: 2009-07-21 18:17:22 -0400 (Tue, 21 Jul 2009) Log Message: ----------- refactor to use Bio::Root::Test some detabification Modified Paths: -------------- bioperl-dev/branches/maj-nexml/t/AlignIO/nexml.t bioperl-dev/branches/maj-nexml/t/SeqIO/nexml.t bioperl-dev/branches/maj-nexml/t/TreeIO/nexml.t Modified: bioperl-dev/branches/maj-nexml/t/AlignIO/nexml.t =================================================================== --- bioperl-dev/branches/maj-nexml/t/AlignIO/nexml.t 2009-07-21 21:13:37 UTC (rev 15878) +++ bioperl-dev/branches/maj-nexml/t/AlignIO/nexml.t 2009-07-21 22:17:22 UTC (rev 15879) @@ -1,52 +1,61 @@ +#-*-perl-*- +# $Id$ use strict; -use lib '../..'; -use Bio::AlignIO::Nexml; -use Test::More tests=> 1000; +chdir('../..'); # hack to allow run from t +use lib '.'; +use lib 't/lib'; +#use Test::More tests=> 1000; +use Bio::Root::Test; + +test_begin( -tests => 1000 ); + use_ok('Bio::AlignIO::nexml'); # checks that your module is there and loads ok - # this passes if $object gets defined without throws by the constructor - # use when droppeded into bioperl - ok( my $inAlnStream = Bio::AlignIO->new(-file => test_input_file("../../code/data_sets/characters.nexml.xml"), -format => 'nexml')); +# this passes if $object gets defined without throws by the constructor +# use when droppeded into bioperl + ok( my $inAlnStream = Bio::AlignIO->new(-file => test_input_file("characters.nexml.xml"), -format => 'nexml'), 'make stream'); - ok( my $aln_obj = $inAlnStream->next_aln() ); - isa_ok($aln_obj, 'Bio::SimpleAlign'); - is ($aln_obj->id, 'DNA sequences', "id"); + ok( my $aln_obj = $inAlnStream->next_aln(), 'nexml matrix to aln' ); + isa_ok($aln_obj, 'Bio::SimpleAlign', 'obj ok'); + is ($aln_obj->id, 'DNA sequences', 'aln id'); my $num =0; my @expected_seqs = ('ACGCTCGCATCGCATC', 'ACGCTCGCATCGCATT', 'ACGCTCGCATCGCATG'); #checking sequence objects foreach my $seq_obj ($aln_obj->each_seq()) { $num++; - is( $seq_obj->alphabet, 'dna', "alphabet" ); - is( $seq_obj->display_id, "DNA sequences.row_$num", "display_id"); - is( $seq_obj->seq, $expected_seqs[$num-1], "sequence"); + is( $seq_obj->alphabet, 'dna', "alphabet" ); + is( $seq_obj->display_id, "DNA sequences.row_$num", "display_id"); + is( $seq_obj->seq, $expected_seqs[$num-1], "sequence correct"); } #tests for writing nexml alignments - ok( my $outAlnStream = Bio::AlignIO->new(-file => test_output_data('>../../code/data_sets/charactersOut.xml'), -format => 'nexml'), 'Begin Tests for writing files');; +diag('Begin tests for write/read roundtrip'); +my $outdata = test_output_file(); + ok( my $outAlnStream = Bio::AlignIO->new(-file =>$outdata, -format => 'nexml'), 'out stream ok');; - ok( $outAlnStream->write_aln($aln_obj)); + ok( $outAlnStream->write_aln($aln_obj), 'write nexml'); +close($outdata); + ok( my $inAlnStream2 = Bio::AlignIO->new(-file => $outdata, -format => 'nexml'), 'reopen');; - ok( my $inAlnStream2 = Bio::AlignIO->new(-file => test_input_data('../../code/data_sets/charactersOut.xml'), -format => 'nexml'), 'Begin Tests for writing files');; - - ok( my $aln_obj2 = $inAlnStream2->next_aln() ); - isa_ok($aln_obj2, 'Bio::SimpleAlign'); - is ($aln_obj2->id, 'DNA sequences', "id"); + ok( my $aln_obj2 = $inAlnStream2->next_aln(),'get aln (rt)' ); + isa_ok($aln_obj2, 'Bio::SimpleAlign', 'aln obj (rt)'); + is ($aln_obj2->id, 'DNA sequences', "aln id (rt)"); $num =0; @expected_seqs = ('ACGCTCGCATCGCATC', 'ACGCTCGCATCGCATT', 'ACGCTCGCATCGCATG'); #checking sequence objects foreach my $seq_obj ($aln_obj2->each_seq()) { $num++; - is( $seq_obj->alphabet, 'dna', "alphabet" ); - is( $seq_obj->display_id, "DNA sequences.row_$num", "display_id"); - is( $seq_obj->seq, $expected_seqs[$num-1], "sequence"); - } \ No newline at end of file + is( $seq_obj->alphabet, 'dna', "alphabet (rt)" ); + is( $seq_obj->display_id, "DNA sequences.row_$num", "display_id (rt)"); + is( $seq_obj->seq, $expected_seqs[$num-1], "sequence (rt)"); + } Modified: bioperl-dev/branches/maj-nexml/t/SeqIO/nexml.t =================================================================== --- bioperl-dev/branches/maj-nexml/t/SeqIO/nexml.t 2009-07-21 21:13:37 UTC (rev 15878) +++ bioperl-dev/branches/maj-nexml/t/SeqIO/nexml.t 2009-07-21 22:17:22 UTC (rev 15879) @@ -1,35 +1,38 @@ +#-*-perl-*- +# $Id$ + use strict; +chdir('../..'); # hack to allow run from t + use lib '../..'; -use Bio::PrimarySeq; -use Bio::SeqIO::Nexml; -use Test::More tests=> 1000; +# use Test::More tests=> 1000; +use Bio::Root::Test; +test_begin( -tests=>1000 ); - +use_ok( 'Bio::PrimarySeq' ); +#use_ok( 'Bio::SeqIO::Nexml' ); use_ok('Bio::SeqIO::nexml'); # checks that your module is there and loads ok # this passes if $object gets defined without throws by the constructor # use when droppeded into bioperl - ok( my $SeqStream = Bio::SeqIO->new(-file => test_input_file("../../code/data_sets/characters.nexml.xml"), -format => 'nexml')); - + ok( my $SeqStream = Bio::SeqIO->new(-file => test_input_file("characters.nexml.xml"), -format => 'nexml'), 'stream ok'); - - #checking first sequence object - ok( my $seq_obj = $SeqStream->next_seq() ); + ok( my $seq_obj = $SeqStream->next_seq(), 'seq obj' ); isa_ok($seq_obj, 'Bio::Seq'); - is( $seq_obj->alphabet, 'dna', "alphabet" ); - is( $seq_obj->primary_id, 'DNA sequences.seq_1', "primary_id"); - is( $seq_obj->display_id, 'dna_seq_1', "display_id"); - is( $seq_obj->seq, 'ACGCTCGCATCGCATC', "sequence"); + is( $seq_obj->alphabet, 'dna', "alphabet" ); + is( $seq_obj->primary_id, 'DNA sequences.seq_1', "primary_id"); + is( $seq_obj->display_id, 'dna_seq_1', "display_id"); + is( $seq_obj->seq, 'ACGCTCGCATCGCATC', "sequence"); #checking second sequence object ok( $seq_obj = $SeqStream->next_seq() ); - is( $seq_obj->alphabet, 'dna', "alphabet" ); - is( $seq_obj->primary_id, 'DNA sequences.seq_2', "primary_id"); - is( $seq_obj->display_id, 'dna_seq_2', "display_id"); - is( $seq_obj->seq, 'ACGCTCGCATCGCATT', "sequence"); + is( $seq_obj->alphabet, 'dna', "alphabet" ); + is( $seq_obj->primary_id, 'DNA sequences.seq_2', "primary_id"); + is( $seq_obj->display_id, 'dna_seq_2', "display_id"); + is( $seq_obj->seq, 'ACGCTCGCATCGCATT', "sequence"); $SeqStream->next_seq(); $SeqStream->next_seq(); @@ -37,45 +40,45 @@ #checking fifth sequence object ok( $seq_obj = $SeqStream->next_seq() ); - is( $seq_obj->alphabet, 'rna', "alphabet" ); - is( $seq_obj->primary_id, 'RNA sequences.seq_2', "primary_id"); - is( $seq_obj->display_id, 'RNA sequences.seq_2', "display_id defaults to primary"); - is( $seq_obj->seq, 'ACGCUCGCAUCGCAUC', "sequence"); + is( $seq_obj->alphabet, 'rna', "alphabet" ); + is( $seq_obj->primary_id, 'RNA sequences.seq_2', "primary_id"); + is( $seq_obj->display_id, 'RNA sequences.seq_2', "display_id defaults to primary"); + is( $seq_obj->seq, 'ACGCUCGCAUCGCAUC', "sequence"); #Start tests for writing to a file +diag('Begin tests for writing seq files'); +my $outdata = test_output_file(); + ok( my $outSeqStream = Bio::SeqIO->new(-file => $outdata, -format => 'nexml'), 'out stream ok'); + ok( $outSeqStream->write_seq($seq_obj), 'write nexml seq'); +close($outdata); + my $inSeqStream = Bio::SeqIO->new(-file => $outdata, -format => 'nexml'); - ok( my $outSeqStream = Bio::SeqIO->new(-file => test_output_data('>../../code/data_sets/charactersSeqsOut.xml'), -format => 'nexml'), 'Begin Tests for writing tree files');; - ok( $outSeqStream->write_seq($seq_obj)); +TODO : { + local $TODO = "not done yet"; + #checking first seq object + ok($seq_obj = $inSeqStream->next_seq() ); + + isa_ok($seq_obj, 'Bio::Seq'); + is( $seq_obj->alphabet, 'dna', "alphabet" ); + is( $seq_obj->primary_id, 'DNA sequences.seq_1', "primary_id"); + is( $seq_obj->display_id, 'dna_seq_1', "display_id"); + is( $seq_obj->seq, 'ACGCTCGCATCGCATC', "sequence"); + + #checking second sequence object + ok( $seq_obj = $SeqStream->next_seq() ); + is( $seq_obj->alphabet, 'dna', "alphabet" ); + is( $seq_obj->primary_id, 'DNA sequences.seq_2', "primary_id"); + is( $seq_obj->display_id, 'dna_seq_2', "display_id"); + is( $seq_obj->seq, 'ACGCTCGCATCGCATT', "sequence"); - my $inSeqStream = Bio::SeqIO->new(-file => test_input_data('../../code/data_sets/charactersSeqsOut.xml'), -format => 'nexml'); - - #TODO when writing multiple seqs works this will be useful -=head not done yet - #checking first tree object - ok($seq_obj = $inSeqStream->next_seq() ); - - isa_ok($seq_obj, 'Bio::Seq'); - is( $seq_obj->alphabet, 'dna', "alphabet" ); - is( $seq_obj->primary_id, 'DNA sequences.seq_1', "primary_id"); - is( $seq_obj->display_id, 'dna_seq_1', "display_id"); - is( $seq_obj->seq, 'ACGCTCGCATCGCATC', "sequence"); - - #checking second sequence object - ok( $seq_obj = $SeqStream->next_seq() ); - is( $seq_obj->alphabet, 'dna', "alphabet" ); - is( $seq_obj->primary_id, 'DNA sequences.seq_2', "primary_id"); - is( $seq_obj->display_id, 'dna_seq_2', "display_id"); - is( $seq_obj->seq, 'ACGCTCGCATCGCATT', "sequence"); - $SeqStream->next_seq(); $SeqStream->next_seq(); -=cut #checking fifth sequence object ok( $seq_obj = $inSeqStream->next_seq() ); - is( $seq_obj->alphabet, 'rna', "alphabet" ); - is( $seq_obj->primary_id, 'RNA sequences.seq_2.seq_1', "primary_id"); - is( $seq_obj->display_id, 'RNA sequences.seq_2.seq_1', "display_id defaults to primary"); - is( $seq_obj->seq, 'ACGCUCGCAUCGCAUC', "sequence"); - \ No newline at end of file + is( $seq_obj->alphabet, 'rna', "alphabet" ); + is( $seq_obj->primary_id, 'RNA sequences.seq_2.seq_1', "primary_id"); + is( $seq_obj->display_id, 'RNA sequences.seq_2.seq_1', "display_id defaults to primary"); + is( $seq_obj->seq, 'ACGCUCGCAUCGCAUC', "sequence"); +}; Modified: bioperl-dev/branches/maj-nexml/t/TreeIO/nexml.t =================================================================== --- bioperl-dev/branches/maj-nexml/t/TreeIO/nexml.t 2009-07-21 21:13:37 UTC (rev 15878) +++ bioperl-dev/branches/maj-nexml/t/TreeIO/nexml.t 2009-07-21 22:17:22 UTC (rev 15879) @@ -1,31 +1,32 @@ +#-*-perl-*- +# $Id$ + use strict; +chdir ('../..'); # hack to allow run from t dir use lib '../..'; -use Bio::Tree::Tree; -use Bio::TreeIO; -use Test::More tests=> 1000; use Bio::Root::Test; +test_begin( -tests=>1000 ); +use_ok( 'Bio::Tree::Tree' ); +use_ok( 'Bio::TreeIO' ); @@ Diff output truncated at 10000 characters. @@ From rbuels at dev.open-bio.org Tue Jul 21 18:19:05 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Tue, 21 Jul 2009 18:19:05 -0400 Subject: [Bioperl-guts-l] [15880] bioperl-live/trunk/t/Seq/PrimarySeq.t: perltidy on t/Seq/PrimarySeq .t, prep for adding some tests for version() accessor Message-ID: <200907212219.n6LMJ5Qb015614@dev.open-bio.org> Revision: 15880 Author: rbuels Date: 2009-07-21 18:19:04 -0400 (Tue, 21 Jul 2009) Log Message: ----------- perltidy on t/Seq/PrimarySeq.t, prep for adding some tests for version() accessor Modified Paths: -------------- bioperl-live/trunk/t/Seq/PrimarySeq.t Modified: bioperl-live/trunk/t/Seq/PrimarySeq.t =================================================================== --- bioperl-live/trunk/t/Seq/PrimarySeq.t 2009-07-21 22:17:22 UTC (rev 15879) +++ bioperl-live/trunk/t/Seq/PrimarySeq.t 2009-07-21 22:19:04 UTC (rev 15880) @@ -3,12 +3,12 @@ use strict; -BEGIN { - use lib '.'; +BEGIN { + use lib '.'; use Bio::Root::Test; - - test_begin(-tests => 54); - + + test_begin( -tests => 54 ); + use_ok('Bio::PrimarySeq'); use_ok('Bio::Location::Simple'); use_ok('Bio::Location::Fuzzy'); @@ -16,175 +16,195 @@ } my $seq = Bio::PrimarySeq->new( - '-seq' => 'TTGGTGGCGTCAACT', - '-display_id' => 'new-id', - '-alphabet' => 'dna', - '-accession_number' => 'X677667', - '-desc' => 'Sample Bio::Seq object'); + '-seq' => 'TTGGTGGCGTCAACT', + '-display_id' => 'new-id', + '-alphabet' => 'dna', + '-accession_number' => 'X677667', + '-desc' => 'Sample Bio::Seq object' +); ok defined $seq; -isa_ok $seq,'Bio::PrimarySeqI'; +isa_ok $seq, 'Bio::PrimarySeqI'; is $seq->accession_number(), 'X677667'; -is $seq->seq(), 'TTGGTGGCGTCAACT'; -is $seq->display_id(), 'new-id'; -is $seq->alphabet(), 'dna'; -is $seq->is_circular(), undef; +is $seq->seq(), 'TTGGTGGCGTCAACT'; +is $seq->display_id(), 'new-id'; +is $seq->alphabet(), 'dna'; +is $seq->is_circular(), undef; ok $seq->is_circular(1); is $seq->is_circular(0), 0; # check IdentifiableI and DescribableI interfaces -isa_ok $seq,'Bio::IdentifiableI'; -isa_ok $seq,'Bio::DescribableI'; +isa_ok $seq, 'Bio::IdentifiableI'; +isa_ok $seq, 'Bio::DescribableI'; + # make sure all methods are implemented is $seq->authority("bioperl.org"), "bioperl.org"; -is $seq->namespace("t"), "t"; -is $seq->version(0), 0; -is $seq->lsid_string(), "bioperl.org:t:X677667"; +is $seq->namespace("t"), "t"; +is $seq->version(0), 0; +is $seq->lsid_string(), "bioperl.org:t:X677667"; is $seq->namespace_string(), "t:X677667.0"; -is $seq->description(), 'Sample Bio::Seq object'; -is $seq->display_name(), "new-id"; +is $seq->description(), 'Sample Bio::Seq object'; +is $seq->display_name(), "new-id"; -my $location = Bio::Location::Simple->new('-start' => 2, - '-end' => 5, - '-strand' => -1); -is ($seq->subseq($location), 'ACCA'); +my $location = Bio::Location::Simple->new( + '-start' => 2, + '-end' => 5, + '-strand' => -1 +); +is( $seq->subseq($location), 'ACCA' ); my $splitlocation = Bio::Location::Split->new(); -$splitlocation->add_sub_Location( Bio::Location::Simple->new( - '-start' => 1, - '-end' => 4, - '-strand' => 1)); +$splitlocation->add_sub_Location( + Bio::Location::Simple->new( + '-start' => 1, + '-end' => 4, + '-strand' => 1 + ) +); -$splitlocation->add_sub_Location( Bio::Location::Simple->new( - '-start' => 7, - '-end' => 12, - '-strand' => -1)); +$splitlocation->add_sub_Location( + Bio::Location::Simple->new( + '-start' => 7, + '-end' => 12, + '-strand' => -1 + ) +); -is( $seq->subseq($splitlocation), 'TTGGTGACGC'); +is( $seq->subseq($splitlocation), 'TTGGTGACGC' ); -my $fuzzy = Bio::Location::Fuzzy->new(-start => '<3', - -end => '8', - -strand => 1); +my $fuzzy = Bio::Location::Fuzzy->new( + -start => '<3', + -end => '8', + -strand => 1 +); -is( $seq->subseq($fuzzy), 'GGTGGC'); +is( $seq->subseq($fuzzy), 'GGTGGC' ); -my $trunc = $seq->trunc(1,4); +my $trunc = $seq->trunc( 1, 4 ); isa_ok $trunc, 'Bio::PrimarySeqI'; -is $trunc->seq(), 'TTGG' or diag("Expecting TTGG. Got ".$trunc->seq()); +is $trunc->seq(), 'TTGG' or diag( "Expecting TTGG. Got " . $trunc->seq() ); $trunc = $seq->trunc($splitlocation); -isa_ok($trunc, 'Bio::PrimarySeqI'); -is( $trunc->seq(), 'TTGGTGACGC'); +isa_ok( $trunc, 'Bio::PrimarySeqI' ); +is( $trunc->seq(), 'TTGGTGACGC' ); $trunc = $seq->trunc($fuzzy); -isa_ok($trunc, 'Bio::PrimarySeqI'); -is( $trunc->seq(), 'GGTGGC'); +isa_ok( $trunc, 'Bio::PrimarySeqI' ); +is( $trunc->seq(), 'GGTGGC' ); my $rev = $seq->revcom(); -isa_ok($rev, 'Bio::PrimarySeqI'); +isa_ok( $rev, 'Bio::PrimarySeqI' ); -is $rev->seq(), 'AGTTGACGCCACCAA' or diag('revcom() failed, was ' . $rev->seq()); +is $rev->seq(), 'AGTTGACGCCACCAA' + or diag( 'revcom() failed, was ' . $rev->seq() ); # # Translate # -my $aa = $seq->translate(); # TTG GTG GCG TCA ACT -is $aa->seq, 'LVAST', "Translation: ". $aa->seq; +my $aa = $seq->translate(); # TTG GTG GCG TCA ACT +is $aa->seq, 'LVAST', "Translation: " . $aa->seq; # tests for non-standard initiator codon coding for # M by making translate() look for an initiator codon and # terminator codon ("complete", the 5th argument below) -$seq->seq('TTGGTGGCGTCAACTTAA'); # TTG GTG GCG TCA ACT TAA -$aa = $seq->translate(undef, undef, undef, undef, 1); -is $aa->seq, 'MVAST', "Translation: ". $aa->seq; +$seq->seq('TTGGTGGCGTCAACTTAA'); # TTG GTG GCG TCA ACT TAA +$aa = $seq->translate( undef, undef, undef, undef, 1 ); +is $aa->seq, 'MVAST', "Translation: " . $aa->seq; # same test as previous, but using named parameter -$aa = $seq->translate(-complete => 1); -is $aa->seq, 'MVAST', "Translation: ". $aa->seq; +$aa = $seq->translate( -complete => 1 ); +is $aa->seq, 'MVAST', "Translation: " . $aa->seq; # find ORF, ignore codons outside the ORF or CDS -$seq->seq('TTTTATGGTGGCGTCAACTTAATTT'); # ATG GTG GCG TCA ACT -$aa = $seq->translate(-orf => 1); -is $aa->seq, 'MVAST*', "Translation: ". $aa->seq; +$seq->seq('TTTTATGGTGGCGTCAACTTAATTT'); # ATG GTG GCG TCA ACT +$aa = $seq->translate( -orf => 1 ); +is $aa->seq, 'MVAST*', "Translation: " . $aa->seq; # smallest possible ORF -$seq->seq("ggggggatgtagcccc"); # atg tga -$aa = $seq->translate(-orf => 1); -is $aa->seq, 'M*', "Translation: ". $aa->seq; +$seq->seq("ggggggatgtagcccc"); # atg tga +$aa = $seq->translate( -orf => 1 ); +is $aa->seq, 'M*', "Translation: " . $aa->seq; # same as previous but complete, so * is removed -$aa = $seq->translate(-orf => 1, - -complete => 1); -is $aa->seq, 'M', "Translation: ". $aa->seq; +$aa = $seq->translate( + -orf => 1, + -complete => 1 +); +is $aa->seq, 'M', "Translation: " . $aa->seq; # ORF without termination codon # should warn, let's change it into throw for testing $seq->verbose(2); -$seq->seq("ggggggatgtggcccc"); # atg tgg ccc -eval { $seq->translate(-orf => 1); }; +$seq->seq("ggggggatgtggcccc"); # atg tgg ccc +eval { $seq->translate( -orf => 1 ); }; if ($@) { - like( $@, qr/atgtggcccc\n/); - $seq->verbose(-1); - $aa = $seq->translate(-orf => 1); - is $aa->seq, 'MWP', "Translation: ". $aa->seq; + like( $@, qr/atgtggcccc\n/ ); + $seq->verbose(-1); + $aa = $seq->translate( -orf => 1 ); + is $aa->seq, 'MWP', "Translation: " . $aa->seq; } $seq->verbose(0); # use non-standard codon table where terminator is read as Q -$seq->seq('ATGGTGGCGTCAACTTAG'); # ATG GTG GCG TCA ACT TAG -$aa = $seq->translate(-codontable_id => 6); -is $aa->seq, 'MVASTQ' or diag("Translation: ". $aa->seq); +$seq->seq('ATGGTGGCGTCAACTTAG'); # ATG GTG GCG TCA ACT TAG +$aa = $seq->translate( -codontable_id => 6 ); +is $aa->seq, 'MVASTQ' or diag( "Translation: " . $aa->seq ); # insert an odd character instead of terminating with * -$aa = $seq->translate(-terminator => 'X'); -is $aa->seq, 'MVASTX' or diag("Translation: ". $aa->seq); +$aa = $seq->translate( -terminator => 'X' ); +is $aa->seq, 'MVASTX' or diag( "Translation: " . $aa->seq ); # change frame from default -$aa = $seq->translate(-frame => 1); # TGG TGG CGT CAA CTT AG -is $aa->seq, 'WWRQL' or diag("Translation: ". $aa->seq); +$aa = $seq->translate( -frame => 1 ); # TGG TGG CGT CAA CTT AG +is $aa->seq, 'WWRQL' or diag( "Translation: " . $aa->seq ); -$aa = $seq->translate(-frame => 2); # GGT GGC GTC AAC TTA G -is $aa->seq, 'GGVNL' or diag("Translation: ". $aa->seq); +$aa = $seq->translate( -frame => 2 ); # GGT GGC GTC AAC TTA G +is $aa->seq, 'GGVNL' or diag( "Translation: " . $aa->seq ); # TTG is initiator in Standard codon table? Afraid so. -$seq->seq("ggggggttgtagcccc"); # ttg tag -$aa = $seq->translate(-orf => 1); -is $aa->seq, 'L*' or diag("Translation: ". $aa->seq); +$seq->seq("ggggggttgtagcccc"); # ttg tag +$aa = $seq->translate( -orf => 1 ); +is $aa->seq, 'L*' or diag( "Translation: " . $aa->seq ); -# Replace L at 1st position with M by setting complete to 1 -$seq->seq("ggggggttgtagcccc"); # ttg tag -$aa = $seq->translate(-orf => 1, - -complete => 1); -is $aa->seq, 'M' or diag("Translation: ". $aa->seq); +# Replace L at 1st position with M by setting complete to 1 +$seq->seq("ggggggttgtagcccc"); # ttg tag +$aa = $seq->translate( + -orf => 1, + -complete => 1 +); +is $aa->seq, 'M' or diag( "Translation: " . $aa->seq ); # Ignore non-ATG initiators (e.g. TTG) in codon table -$seq->seq("ggggggttgatgtagcccc"); # atg tag -$aa = $seq->translate(-orf => 1, - -start => "atg", - -complete => 1); -is $aa->seq, 'M' or diag("Translation: ". $aa->seq); +$seq->seq("ggggggttgatgtagcccc"); # atg tag +$aa = $seq->translate( + -orf => 1, + -start => "atg", + -complete => 1 +); +is $aa->seq, 'M' or diag( "Translation: " . $aa->seq ); - # test for character '?' in the sequence string is $seq->seq('TTGGTGGCG?CAACT'), 'TTGGTGGCG?CAACT'; # test for some aliases -$seq = Bio::PrimarySeq->new(-id => 'aliasid', - -description => 'Alias desc'); -is($seq->description, 'Alias desc'); -is($seq->display_id, 'aliasid'); +$seq = Bio::PrimarySeq->new( + -id => 'aliasid', + -description => 'Alias desc' +); +is( $seq->description, 'Alias desc' ); +is( $seq->display_id, 'aliasid' ); # test that x's are ignored and n's are assumed to be 'dna' no longer true! # See Bug 2438. There are protein sequences floating about which are all 'X' # (unknown aa) $seq->seq('atgxxxxxx'); -is($seq->alphabet,'protein'); +is( $seq->alphabet, 'protein' ); @@ Diff output truncated at 10000 characters. @@ From rbuels at dev.open-bio.org Tue Jul 21 18:31:02 2009 From: rbuels at dev.open-bio.org (Robert Buels) Date: Tue, 21 Jul 2009 18:31:02 -0400 Subject: [Bioperl-guts-l] [15881] bioperl-live/trunk/t/Seq/PrimarySeq.t: added from tests for PrimarySeq versions, and some todo tests for copying attributes through revcoms. Message-ID: <200907212231.n6LMV2dL015645@dev.open-bio.org> Revision: 15881 Author: rbuels Date: 2009-07-21 18:31:02 -0400 (Tue, 21 Jul 2009) Log Message: ----------- added from tests for PrimarySeq versions, and some todo tests for copying attributes through revcoms. just done in passing, since i was over here anyway looking at the version() stuff in primaryseq Modified Paths: -------------- bioperl-live/trunk/t/Seq/PrimarySeq.t Modified: bioperl-live/trunk/t/Seq/PrimarySeq.t =================================================================== --- bioperl-live/trunk/t/Seq/PrimarySeq.t 2009-07-21 22:19:04 UTC (rev 15880) +++ bioperl-live/trunk/t/Seq/PrimarySeq.t 2009-07-21 22:31:02 UTC (rev 15881) @@ -7,7 +7,7 @@ use lib '.'; use Bio::Root::Test; - test_begin( -tests => 54 ); + test_begin( -tests => 62 ); use_ok('Bio::PrimarySeq'); use_ok('Bio::Location::Simple'); @@ -39,9 +39,13 @@ # make sure all methods are implemented is $seq->authority("bioperl.org"), "bioperl.org"; is $seq->namespace("t"), "t"; -is $seq->version(0), 0; +is $seq->namespace, "t"; +is $seq->version(0), 0; is $seq->lsid_string(), "bioperl.org:t:X677667"; is $seq->namespace_string(), "t:X677667.0"; +$seq->version(47); +is $seq->version, 47; +is $seq->namespace_string(), "t:X677667.47"; is $seq->description(), 'Sample Bio::Seq object'; is $seq->display_name(), "new-id"; @@ -97,6 +101,17 @@ is $rev->seq(), 'AGTTGACGCCACCAA' or diag( 'revcom() failed, was ' . $rev->seq() ); +is $rev->display_id, 'new-id'; +is( $rev->alphabet(), 'dna', 'alphabet copied through revcom' ); +TODO: { + local $TODO = + 'all attributes of primaryseqs are not currently copied through revcoms'; + is( $rev->namespace, 't', 'namespace copied through revcom' ); + is( $rev->namespace_string(), + "t:X677667.47", 'namespace_string copied through revcom' ); + is( $rev->is_circular(), 0, 'is_circular copied through revcom' ); +} + # # Translate # From maj at dev.open-bio.org Wed Jul 22 00:53:18 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 22 Jul 2009 00:53:18 -0400 Subject: [Bioperl-guts-l] [15882] bioperl-dev/branches/maj-nexml/Bio/Nexml.pm: A style review Message-ID: <200907220453.n6M4rInq018547@dev.open-bio.org> Revision: 15882 Author: maj Date: 2009-07-22 00:53:18 -0400 (Wed, 22 Jul 2009) Log Message: ----------- A style review Modified Paths: -------------- bioperl-dev/branches/maj-nexml/Bio/Nexml.pm Property Changed: ---------------- bioperl-dev/branches/maj-nexml/Bio/Nexml.pm Modified: bioperl-dev/branches/maj-nexml/Bio/Nexml.pm =================================================================== --- bioperl-dev/branches/maj-nexml/Bio/Nexml.pm 2009-07-21 22:31:02 UTC (rev 15881) +++ bioperl-dev/branches/maj-nexml/Bio/Nexml.pm 2009-07-22 04:53:18 UTC (rev 15882) @@ -73,11 +73,14 @@ http://bugzilla.open-bio.org/ -=head1 AUTHOR - Chase Miller, Mark A. Jensen +=head1 AUTHOR - Chase Miller Email chmille4 at gmail.com - maj at fortinbras.us +=head1 CONTRIBUTORS + +Mark A. Jensen, maj -at- fortinbras -dot- com + =head1 APPENDIX The rest of the documentation details each of the object @@ -92,11 +95,13 @@ use strict; #TODO Change this use lib '..'; -use Bio::SeqIO::Nexml; -use Bio::AlignIO::Nexml; -use Bio::TreeIO::Nexml; + +use Bio::SeqIO::nexml; +use Bio::AlignIO::nexml; +use Bio::TreeIO::nexml; +use Bio::Nexml::Util; + use Bio::Phylo::IO; -use Bio::Nexml::Util; use Bio::Phylo::Factory; use Bio::Phylo::Matrices; @@ -144,21 +149,29 @@ sub _parse { my ($self) = @_; - $self->{'_parsed'} = 1; - $self->{'_treeiter'} = 0; - $self->{'_seqiter'} = 0; - $self->{'_alniter'} = 0; +# $self->{'_treeiter'} = 0; +# $self->{'_seqiter'} = 0; +# $self->{'_alniter'} = 0; + + # don't forget that $self is just a hashref, so you can do + @{$self}{qw( _treeiter _seqiter _alniter)} = (0,0,0); + # with the ever-popular "hash slice" (one of my faves) $self->{'_trees'} = Bio::Nexml::Util->_make_tree($self->doc); $self->{'_alns'} = Bio::Nexml::Util->_make_aln($self->doc); $self->{'_seqs'} = Bio::Nexml::Util->_make_seq($self->doc); + + $self->{'_parsed'} = 1; # success if you got here } sub next_tree { my $self = shift; - unless ( $self->{'_parsed'} ) { - $self->_parse; - } +# unless ( $self->{'_parsed'} ) { +# $self->_parse; +# } + # a 'pro' idiom for this is: + $self->_parse unless $self->{'_parsed'}; + return $self->{'_trees'}->[ $self->{'_treeiter'}++ ]; } @@ -178,22 +191,41 @@ return $self->{'_alns'}->[ $self->{'_alniter'}++ ]; } +### here's a rewind idea: +sub rewind { + my $self = shift; + my $elt = shift; + $self->{"_${elt}iter"} = 0 if defined $self->{"_${elt}iter"}; + return 1; +} + +sub rewind_seq { shift->rewind('seq'); } +sub rewind_aln { shift->rewind('aln'); } +sub rewind_tree { shift->rewind('tree'); } + +# you could do something similar with the next_* functions too. Slick. + +### + sub write_doc { my ($self, @args) = @_; my %params = @args; - my $trees = $params{'-trees'}; - my $alns = $params{'-alns'}; - my $seqs = $params{'-seqs'}; - +# my $trees = $params{'-trees'}; +# my $alns = $params{'-alns'}; +# my $seqs = $params{'-seqs'}; + + # and the other direction: + my ($trees, $alns, $seqs) = @params{qw( -trees -alns -seqs )}; + my $proj_doc = Bio::Phylo::Factory->create_project(); #convert trees to bio::Phylo objects my $forest = Bio::Phylo::Factory->create_forest(); my @forests; - my @taxas; + my @taxas; # remember that taxa is already plural (of taxon)/maj my $ent; my $taxa_o; my $phylo_tree_o; @@ -201,7 +233,17 @@ foreach my $tree (@$trees) { ($phylo_tree_o, $taxa_o) = Bio::Nexml::Util->create_bphylo_tree($tree); - link_taxa($self, $taxa_o, $forest, \@taxas); +# link_taxa($self, $taxa_o, $forest, \@taxas); + # why not + $self->link_taxa($taxa_o, $forest, \@taxas); + + # what is the \@taxas argument for? It isn't set. + # Maybe you don't need it here--then you can just say + +# $self->link_taxa($taxa_o, $forest); + # and the missing argument will just be undef in the + # method -- this saves some unnecessary declarations + # and cruft $forest->insert($phylo_tree_o); } @@ -215,7 +257,12 @@ ($phylo_matrix_o, $taxa_o) = Bio::Nexml::Util->create_bphylo_aln($aln); #link_taxa and check for already existing identical taxa - link_taxa($self, $taxa_o, $phylo_matrix_o, \@matrix_taxas); +# link_taxa($self, $taxa_o, $phylo_matrix_o, \@matrix_taxas); + $self->link_taxa($taxa_o, $phylo_matrix_o, \@matrix_taxas); + # is \@matrix_taxas set?? do you want +# $self->link_taxa($taxa_o, $phylo_matrix_o); + # for this call? (see above comments) + $matrices->insert($phylo_matrix_o); } @@ -238,15 +285,22 @@ $self->_print($proj_doc->to_xml()); } + +# this is hairy--probably can use some tricks to clean it up a bit./maj + sub link_taxa { - + my ($self, $taxa_o, $phylo_cont_o, $taxas) = @_; my $duplicate_taxa; my $new_taxa_ents = $taxa_o->get_entities(); #test if taxa_o is already present + + # how about pushing this loop into a subroutine that + # returns $duplicate_taxa, to clean up the code a bit? + #### foreach my $taxa (@$taxas) { my $taxa_ents = $taxa->get_entities; @@ -264,6 +318,8 @@ last; } } + #### + if (!$duplicate_taxa) { push @$taxas, $taxa_o; $phylo_cont_o->set_taxa($taxa_o); @@ -275,22 +331,28 @@ } my $present_taxa_ents = $duplicate_taxa->get_entities(); - my %present_taxa_ents = map {($_)->get_name, $_} @$present_taxa_ents; + # '=>' means exactly the same as ',' but it makes it clearer + # that you're producing a hash.../maj + my %present_taxa_ents = map {($_)->get_name => $_} @$present_taxa_ents; foreach my $new_taxa_ent (@$new_taxa_ents) { my $new_label = $new_taxa_ent->get_name(); #If tree get nodes and change taxa to point to already present ($duplicated_taxa) taxa - if($phylo_cont_o->isa('Bio::Phylo::Forest')) { + + # rearranging with a / /&&do{}; switch structure/maj + for (ref $phylo_cont_o) { + /Bio::Phylo::Forest/ && do { my $nodes = $new_taxa_ent->get_nodes(); foreach my $node (@$nodes) { $new_taxa_ent->unset_node($node); $present_taxa_ents{$new_label}->set_nodes($node); } - } + last; + }; #If matrix get data and change the taxa to point to already present ($duplicated_taxa) taxa - elsif($phylo_cont_o->isa('Bio::Phylo::Matrices::Matrix')) { + /Bio::Phylo::Matrices::Matrix/ && do { my $data = $new_taxa_ent->get_data(); foreach my $datum (@$data) { @@ -298,14 +360,19 @@ $present_taxa_ents{$new_label}->set_data($datum); } $phylo_cont_o->set_taxa($duplicate_taxa); - } - else { + last; + }; + do { # else $self->throw("Object container must be either Forest or Matrix"); + }; } - my $xml_id = $present_taxa_ents{$new_label}; - if( !$xml_id) { - $self->throw("taxa conversion error - taxa not identical"); - } +# my $xml_id = $present_taxa_ents{$new_label}; +# if( !$xml_id) { +# $self->throw("taxa conversion error - taxa not identical"); +# } + # more condensation/maj + $self->throw("taxa conversion error - taxa not identical") unless $present_taxa_ents{$new_label}; + } } } Property changes on: bioperl-dev/branches/maj-nexml/Bio/Nexml.pm ___________________________________________________________________ Name: svn:keywords - Id Date Author Rev + Id Rev Author Date From maj at dev.open-bio.org Wed Jul 22 22:06:22 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 22 Jul 2009 22:06:22 -0400 Subject: [Bioperl-guts-l] [15883] bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm: code review-- Message-ID: <200907230206.n6N26MiF022699@dev.open-bio.org> Revision: 15883 Author: maj Date: 2009-07-22 22:06:21 -0400 (Wed, 22 Jul 2009) Log Message: ----------- code review-- design comments for Chase Modified Paths: -------------- bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm Modified: bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm =================================================================== --- bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm 2009-07-22 04:53:18 UTC (rev 15882) +++ bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm 2009-07-23 02:06:21 UTC (rev 15883) @@ -1,6 +1,6 @@ # $Id$ # -# BioPerl module for Bio::TreeIO::nexml +# BioPerl module for Bio::Nexml::Util # # Please direct questions and support issues to # @@ -80,8 +80,28 @@ use Bio::Phylo::Matrices::Datatype::Rna; #not sure that it needs to inerhit from Bio::Nexml -use base qw(Bio::Nexml); +# no reason to really; you don't make instances of this class, +# and there doesn't seem to be any calling of Bio::Nexml methods +# here. Better to avoid the inheritance if not strictly necessary- +# /maj + +#use base qw(Bio::Nexml); + +# It looks to me like you've created a factory without realizing it +# (or maybe you did realize it!): +# if you *do* add a constructor here, then up in Nexml.pm, you +# can initialize a Util object, like + +# $bn_fac = Bio::Nexml::Util->new() + +# and instead of Bio::Nexml::Util->create_..., you can call off the instance +# $bn_fac->create_... + +# this looks like only a preference at the moment, BUT I think I'll have more +# to say on the subject..../maj + + my $fac = Bio::Phylo::Factory->new(); From maj at dev.open-bio.org Wed Jul 22 23:35:59 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 22 Jul 2009 23:35:59 -0400 Subject: [Bioperl-guts-l] [15884] bioperl-live/trunk/Bio/SimpleAlign.pm: Sprucing the shaggy pod in the FeatureHolderI Message-ID: <200907230335.n6N3ZxLk025402@dev.open-bio.org> Revision: 15884 Author: maj Date: 2009-07-22 23:35:59 -0400 (Wed, 22 Jul 2009) Log Message: ----------- Sprucing the shaggy pod in the FeatureHolderI implementation. Also, added that filter HL was dreaming of (get_SeqFeatures). Modified Paths: -------------- bioperl-live/trunk/Bio/SimpleAlign.pm Modified: bioperl-live/trunk/Bio/SimpleAlign.pm =================================================================== --- bioperl-live/trunk/Bio/SimpleAlign.pm 2009-07-23 02:06:21 UTC (rev 15883) +++ bioperl-live/trunk/Bio/SimpleAlign.pm 2009-07-23 03:35:59 UTC (rev 15884) @@ -2966,48 +2966,46 @@ } -=head2 methods for Bio::FeatureHolder +=head2 methods implementing Bio::FeatureHolderI -FeatureHolder implementation to support labeled character sets like one +FeatureHolderI implementation to support labeled character sets like one would get from NEXUS represented data. =head2 get_SeqFeatures - Usage : + Usage : @features = $aln->get_SeqFeatures Function: Get the feature objects held by this feature holder. Example : Returns : an array of Bio::SeqFeatureI implementing objects - Args : none + Args : optional filter coderef, taking a Bio::SeqFeatureI + : as argument, returning TRUE if wanted, FALSE if + : unwanted -At some day we may want to expand this method to allow for a feature -filter to be passed in. - =cut sub get_SeqFeatures { my $self = shift; - + my $filter_cb = shift; + $self->throw("Arg (filter callback) must be a coderef") unless + !defined($filter_cb) or ref($filter_cb) eq 'CODE'; if( !defined $self->{'_as_feat'} ) { $self->{'_as_feat'} = []; } + if ($filter_cb) { + return grep { $filter_cb->($_) } @{$self->{'_as_feat'}}; + } return @{$self->{'_as_feat'}}; } =head2 add_SeqFeature - Usage : $feat->add_SeqFeature($subfeat); - $feat->add_SeqFeature($subfeat,'EXPAND') - Function: adds a SeqFeature into the subSeqFeature array. - with no 'EXPAND' qualifer, subfeat will be tested - as to whether it lies inside the parent, and throw - an exception if not. - - If EXPAND is used, the parent''s start/end/strand will - be adjusted so that it grows to accommodate the new - subFeature + Usage : $aln->add_SeqFeature($subfeat); + Function: adds a SeqFeature into the SeqFeature array. Example : - Returns : nothing + Returns : true on success Args : a Bio::SeqFeatureI object + Note : This implementation is not compliant + with Bio::FeatureHolderI =cut @@ -3030,10 +3028,10 @@ =head2 remove_SeqFeatures Usage : $obj->remove_SeqFeatures - Function: Removes all sub SeqFeatures. If you want to remove only a subset, + Function: Removes all SeqFeatures. If you want to remove only a subset, remove that subset from the returned array, and add back the rest. - Returns : The array of Bio::SeqFeatureI implementing sub-features that was - deleted from this feature. + Returns : The array of Bio::SeqFeatureI features that was + deleted from this alignment. Args : none =cut @@ -3051,25 +3049,10 @@ Title : feature_count Usage : $obj->feature_count() - Function: Return the number of SeqFeatures attached to a feature holder. - - This is before flattening a possible sub-feature tree. - - We provide a default implementation here that just counts - the number of objects returned by get_SeqFeatures(). - Implementors may want to override this with a more - efficient implementation. - + Function: Return the number of SeqFeatures attached to the alignment Returns : integer representing the number of SeqFeatures Args : None -At some day we may want to expand this method to allow for a feature -filter to be passed in. - -Our default implementation allows for any number of additional -arguments and will pass them on to get_SeqFeatures(). I.e., in order to -support filter arguments, just support them in get_SeqFeatures(). - =cut sub feature_count { @@ -3085,27 +3068,13 @@ =head2 get_all_SeqFeatures Title : get_all_SeqFeatures - Usage : - Function: Get the flattened tree of feature objects held by this - feature holder. The difference to get_SeqFeatures is that - the entire tree of sub-features will be flattened out. - - We provide a default implementation here, so implementors - don''t necessarily need to implement this method. - + Usage : + Function: Get all SeqFeatures. Example : Returns : an array of Bio::SeqFeatureI implementing objects Args : none + Note : Falls through to Bio::FeatureHolderI implementation. -At some day we may want to expand this method to allow for a feature -filter to be passed in. - -Our default implementation allows for any number of additional -arguments and will pass them on to any invocation of -get_SeqFeatures(), wherever a component of the tree implements -FeatureHolderI. I.e., in order to support filter arguments, just -support them in get_SeqFeatures(). - =cut =head2 methods for Bio::AnnotatableI From maj at dev.open-bio.org Wed Jul 22 23:45:50 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Wed, 22 Jul 2009 23:45:50 -0400 Subject: [Bioperl-guts-l] [15885] bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm: more comments Message-ID: <200907230345.n6N3jo1S025716@dev.open-bio.org> Revision: 15885 Author: maj Date: 2009-07-22 23:45:50 -0400 (Wed, 22 Jul 2009) Log Message: ----------- more comments Modified Paths: -------------- bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm Modified: bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm =================================================================== --- bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm 2009-07-23 03:35:59 UTC (rev 15884) +++ bioperl-dev/branches/maj-nexml/Bio/Nexml/Util.pm 2009-07-23 03:45:50 UTC (rev 15885) @@ -28,6 +28,9 @@ This is a utility module in the nexml namespace. It contains methods that are needed by multiple modules. +A few key design issues pertaining to this module will be +described here. + =head1 FEEDBACK =head2 Mailing Lists @@ -104,6 +107,7 @@ my $fac = Bio::Phylo::Factory->new(); +# PODPODPOD sub _make_aln { my ($self, $proj) = @_; @@ -121,6 +125,10 @@ unless ($mol_type eq 'dna' || $mol_type eq 'rna' || $mol_type eq 'protein') { next; + # something for the back-burner: BioPerl has objects + # to handle arbitrary genotypes; might be cool to + # be able to create something besides alignments + # here .../maj } my $basename = $matrix->get_name(); @@ -142,10 +150,12 @@ $seq = Bio::LocatableSeq->new( -seq => $newSeq, - -display_id => "$seqID", +# -display_id => "$seqID", + -display_id => "$rowlabel", #-description => $desc, -alphabet => $mol_type, ); + my $feat; #check if taxon linked to sequence if so create feature to attach to alignment foreach my $taxa_o (@$taxa) @@ -174,6 +184,8 @@ return \@alns; } +#PODPODPOD + sub _make_tree { my($self, $proj) = @_; my @trees; @@ -259,6 +271,8 @@ return \@trees; } +#PODPODPOD + sub _make_seq { my($self, $proj) = @_; my $matrices = $proj->get_matrices(); @@ -326,6 +340,8 @@ return \@seqs; } +#PODPODPOD + sub create_bphylo_tree { my ($self, $bptree) = @_; #most of the code below ripped form Bio::Phylo::Forest::Tree::new_from_bioperl()d @@ -353,8 +369,8 @@ return $tree, $taxa; } +#PODPODPOD - sub _copy_tree { my ( $tree, $bpnode, $parent, $taxa ) = @_; my $node = Bio::Phylo::Forest::Node->new_from_bioperl($bpnode); @@ -376,6 +392,8 @@ return $tree, $taxa; } +#PODPODPOD + sub create_bphylo_aln { my ($self, $aln, @args) = @_; @@ -423,6 +441,8 @@ } } +#PODPODPOD + sub create_bphylo_seq { my ($self, $seq, @args) = @_; my $type = $seq->alphabet || $seq->_guess_alphabet || 'dna'; @@ -438,6 +458,9 @@ if ( $seqstring and $seqstring =~ /\S/ ) { eval { $dat->set_char( $seqstring ) }; #TODO Test debuggin + + # let's convert Rutger's cool exceptions to the more pedestrian Bioperl throws/maj + if ( $@ and UNIVERSAL::isa($@,'Bio::Phylo::Util::Exceptions::InvalidData') ) { $self->throw( "\nAn exception of type Bio::Phylo::Util::Exceptions::InvalidData was caught\n\n". @@ -471,6 +494,8 @@ return $matrix, $taxa; } +#PODPODPOD (there's a leitmotif here...) + sub create_bphylo_taxa { my ($aln, $seq) = @_; @@ -480,6 +505,8 @@ } +#PODPODPOD + sub create_bphylo_datum { #ripped from Bio::Phylo::Matrices::Datum::new_from_bioperl() my ( $seq, $feats, $taxa, @args ) = @_; @@ -493,7 +520,9 @@ my $seqstring = $seq->seq; if ( $seqstring and $seqstring =~ /\S/ ) { eval { $self->set_char( $seqstring ) }; - + + # let's convert Rutger's cool exceptions to the more pedestrian Bioperl throws/maj + if ( $@ and UNIVERSAL::isa($@,'Bio::Phylo::Util::Exceptions::InvalidData') ) { $self->throw( "\nAn exception of type Bio::Phylo::Util::Exceptions::InvalidData was caught\n\n". From bugzilla-daemon at portal.open-bio.org Thu Jul 23 01:45:16 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 01:45:16 -0400 Subject: [Bioperl-guts-l] [Bug 2884] New: the coverage parsed by bioperl from blast result is great than 1 Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2884 Summary: the coverage parsed by bioperl from blast result is great than 1 Product: BioPerl Version: 1.6 branch Platform: Sun OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Bio::Search/Bio::SearchIO AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: jinjp at mail.cbi.pku.edu.cn when using bioperl to extract blast results, I found there are some results whose coverage great than 1 ( attached an example). when I looked the code of Bio/Search/SearchUtils.pm, I found that there are several errors. In line 439-514, when we mergered the contigs there maybe appear new contigs that can be mergered. if this occurs when calling "_adjust_contigs" for the last HSP, we maybe ignore mergering some new contigs. So we should merger them again and again until there are no new contigs appearing. there is another problem. you took the strand and frame into consideration when calculating the $qctg_dat (line 254-257), while when merger contigs, you didn't consider them. That will result in some mistakes when calculating the coverage sometimes. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 01:48:56 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 01:48:56 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907230548.n6N5mu61022064@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #1 from cjfields at bioperl.org 2009-07-23 01:48 EST ------- This may have been rendered obsolete due to Mark Jensen's refactoring of HSP tiling: http://www.bioperl.org/wiki/HOWTO:Tiling Mark, want to comment? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 02:03:39 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 02:03:39 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907230603.n6N63dqX022569@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #2 from jinjp at mail.cbi.pku.edu.cn 2009-07-23 02:03 EST ------- Created an attachment (id=1347) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1347&action=view) BLAST output ,parsing Perl code, and the erroneous output this attachment includes three files. blast.out is the raw blast result file. extract_blast_result.pl is the code to parse blast.out. blast_extract is the result parsed by extract_blast_result.pl, and in which we can see the error(coverage >1) -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 02:26:37 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 02:26:37 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907230626.n6N6Qbxx023377@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #3 from jinjp at mail.cbi.pku.edu.cn 2009-07-23 02:26 EST ------- (In reply to comment #1) > This may have been rendered obsolete due to Mark Jensen's refactoring of HSP > tiling: > http://www.bioperl.org/wiki/HOWTO:Tiling > Mark, want to comment? when using blastn, it does solved using the lastest version of bioperl. but when using tblastn, there are remain some coverages great than 1(see attactment). -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 14:02:46 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 14:02:46 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907231802.n6NI2k1V018945@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #4 from maj at fortinbras.us 2009-07-23 14:02 EST ------- I'm on the case; I'll get to something substantial soon today. My first guess here is that something is working in SearchUtils because I patched it before going all the way with MapTiling. I highly recommend checking the HOWTO:Tiling to see how the original script can be converted from the SearchUtils-based system. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 16:43:06 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 16:43:06 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907232043.n6NKh6Oo028722@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #5 from maj at fortinbras.us 2009-07-23 16:43 EST ------- Created an attachment (id=1348) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1348&action=view) unRARed script of jin plain text version of example script -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 17:08:09 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 17:08:09 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907232108.n6NL896H029557@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #6 from maj at fortinbras.us 2009-07-23 17:08 EST ------- Created an attachment (id=1349) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1349&action=view) example script converted to use Bio::Search::Tiling::MapTiling I modified the script to use MapTiling in the correct places, and it seems to work fine. Please test with the latest BioPerl. Thanks MAJ -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 17:17:44 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 17:17:44 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907232117.n6NLHitD030000@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #7 from cjfields at bioperl.org 2009-07-23 17:17 EST ------- So, do we want to integrate this into Bio::Search (and deprecate using the older tiling methods in SearchUtils)? Or should we leave it separate altogether? My vote is on the latter; not everyone needs tiling analysis. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Jul 23 17:32:19 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 23 Jul 2009 17:32:19 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907232132.n6NLWJhI030372@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #8 from maj at fortinbras.us 2009-07-23 17:32 EST ------- (In reply to comment #7) > So, do we want to integrate this into Bio::Search (and deprecate using the > older tiling methods in SearchUtils)? Or should we leave it separate > altogether? > > My vote is on the latter; not everyone needs tiling analysis. > I think the separateness of it is a virtue, definitely. I think SearchUtils should be deprecated; it's very crusty, and not even Jason trusts it. MapTiling has had a pretty fair run, 3000 hits on the HOWTO and no bugs yet or negative comments, so may be a good time. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From fangly at dev.open-bio.org Thu Jul 23 22:27:57 2009 From: fangly at dev.open-bio.org (Florent E Angly) Date: Thu, 23 Jul 2009 22:27:57 -0400 Subject: [Bioperl-guts-l] [15886] bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm: Slight modification and explanation of the Bio::Tools::Run::TigrAssembler <39bp issue Message-ID: <200907240227.n6O2Rv0V028991@dev.open-bio.org> Revision: 15886 Author: fangly Date: 2009-07-23 22:27:56 -0400 (Thu, 23 Jul 2009) Log Message: ----------- Slight modification and explanation of the Bio::Tools::Run::TigrAssembler <39bp issue Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm Modified: bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm 2009-07-23 03:45:50 UTC (rev 15885) +++ bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm 2009-07-24 02:27:56 UTC (rev 15886) @@ -24,6 +24,10 @@ # do something with assembled sequences } + # Look at what command was run and what the intermediary files were using: + $assembler->verbose(2); + $assembler->save_tempfiles(1); + =head1 DESCRIPTION Wrapper module for the local execution of the DNA assembly program TIGR @@ -36,7 +40,7 @@ The description enables to runs TIGR Assembler by feeding it sequence objects and returning assembly objects. The input could be an array of Bio::PrimarySeq or maybe Bio::Seq::Quality, in which case, the quality scores will - automatically be used during assembly. Sequences less than 40 bp long are + automatically be used during assembly. Sequences less than 39 bp long are filtered out since they are not supported by TIGR Assembler. The amount of memory in your machine may prevent you to assemble large sequence datasets, but this module offers a way to split your dataset in smaller @@ -287,7 +291,7 @@ $last = $tot_nof_seqs-1 if $last > $tot_nof_seqs-1; my @seq_subset = @$seqs[$first..$last]; my @qual_subset = @$quals[$first..$last] if $quals; - # Write temp FASTA and QUAL input files, removing sequences less than 40bp + # Write temp FASTA and QUAL input files, removing sequences less than 39bp my ($fasta_file, $qual_file) = $self->_write_seq_file(\@seq_subset, \@qual_subset); # Assemble if (defined $fasta_file) { @@ -329,8 +333,10 @@ $self->warn("A sequence had no ID. Its ID is now $newid"); } my $seqid = $seq->id; - # Remove sequences less than 40bp (not supported by TIGR_Assembler) - my $min_length = 40; + # Remove sequences less than 39bp because they make TIGR_Assembler crash. + # To reproduce this bug, take 2 identical sequences, trim one below 39bp, + # run TIGR_Assembler with its default parameters, and watch the backtrace + my $min_length = 39; if ($seq->length < $min_length) { splice @$seqs, $i, 1; $i--; From cjfields at illinois.edu Thu Jul 23 22:57:23 2009 From: cjfields at illinois.edu (Chris Fields) Date: Thu, 23 Jul 2009 21:57:23 -0500 Subject: [Bioperl-guts-l] [15886] bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm: Slight modification and explanation of the Bio::Tools::Run::TigrAssembler <39bp issue In-Reply-To: <200907240227.n6O2Rv0V028991@dev.open-bio.org> References: <200907240227.n6O2Rv0V028991@dev.open-bio.org> Message-ID: <012E92F5-6DFE-45CF-9887-F1D595231DC5@illinois.edu> Yikes! Really? -c On Jul 23, 2009, at 9:27 PM, Florent E Angly wrote: > Revision: 15886 > Author: fangly > Date: 2009-07-23 22:27:56 -0400 (Thu, 23 Jul 2009) > > Log Message: > ----------- > Slight modification and explanation of the > Bio::Tools::Run::TigrAssembler <39bp issue > > Modified Paths: > -------------- > bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm > > Modified: bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm > =================================================================== > --- bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm 2009-07-23 > 03:45:50 UTC (rev 15885) > +++ bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm 2009-07-24 > 02:27:56 UTC (rev 15886) > @@ -24,6 +24,10 @@ > # do something with assembled sequences > } > > + # Look at what command was run and what the intermediary files > were using: > + $assembler->verbose(2); > + $assembler->save_tempfiles(1); > + > =head1 DESCRIPTION > > Wrapper module for the local execution of the DNA assembly program > TIGR > @@ -36,7 +40,7 @@ > The description enables to runs TIGR Assembler by feeding it > sequence objects > and returning assembly objects. The input could be an array of > Bio::PrimarySeq > or maybe Bio::Seq::Quality, in which case, the quality scores will > - automatically be used during assembly. Sequences less than 40 bp > long are > + automatically be used during assembly. Sequences less than 39 bp > long are > filtered out since they are not supported by TIGR Assembler. The > amount of memory in your machine may prevent you to assemble large > sequence > datasets, but this module offers a way to split your dataset in > smaller > @@ -287,7 +291,7 @@ > $last = $tot_nof_seqs-1 if $last > $tot_nof_seqs-1; > my @seq_subset = @$seqs[$first..$last]; > my @qual_subset = @$quals[$first..$last] if $quals; > - # Write temp FASTA and QUAL input files, removing sequences > less than 40bp > + # Write temp FASTA and QUAL input files, removing sequences > less than 39bp > my ($fasta_file, $qual_file) = $self- > >_write_seq_file(\@seq_subset, \@qual_subset); > # Assemble > if (defined $fasta_file) { > @@ -329,8 +333,10 @@ > $self->warn("A sequence had no ID. Its ID is now $newid"); > } > my $seqid = $seq->id; > - # Remove sequences less than 40bp (not supported by > TIGR_Assembler) > - my $min_length = 40; > + # Remove sequences less than 39bp because they make > TIGR_Assembler crash. > + # To reproduce this bug, take 2 identical sequences, trim one > below 39bp, > + # run TIGR_Assembler with its default parameters, and watch the > backtrace > + my $min_length = 39; > if ($seq->length < $min_length) { > splice @$seqs, $i, 1; > $i--; > > > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l From bugzilla-daemon at portal.open-bio.org Fri Jul 24 04:51:54 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 24 Jul 2009 04:51:54 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907240851.n6O8psgG022545@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #9 from jinjp at mail.cbi.pku.edu.cn 2009-07-24 04:51 EST ------- It's OK, Thanks. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From fangly at dev.open-bio.org Fri Jul 24 15:25:34 2009 From: fangly at dev.open-bio.org (Florent E Angly) Date: Fri, 24 Jul 2009 15:25:34 -0400 Subject: [Bioperl-guts-l] [15887] bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm: Added some information in the Bio::Tools::Run::TigrAssembler documentation Message-ID: <200907241925.n6OJPYRP030957@dev.open-bio.org> Revision: 15887 Author: fangly Date: 2009-07-24 15:25:33 -0400 (Fri, 24 Jul 2009) Log Message: ----------- Added some information in the Bio::Tools::Run::TigrAssembler documentation Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm Modified: bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm 2009-07-24 02:27:56 UTC (rev 15886) +++ bioperl-run/trunk/Bio/Tools/Run/TigrAssembler.pm 2009-07-24 19:25:33 UTC (rev 15887) @@ -31,12 +31,9 @@ =head1 DESCRIPTION Wrapper module for the local execution of the DNA assembly program TIGR - Assembler v2.0. TIGR. + Assembler v2.0. TIGR Assembler is open source software under The Artistic + License and available at: http://www.tigr.org/software/assembler/ - Assembler is open source software under The Artistic License and available at: - - http://www.tigr.org/software/assembler/ - The description enables to runs TIGR Assembler by feeding it sequence objects and returning assembly objects. The input could be an array of Bio::PrimarySeq or maybe Bio::Seq::Quality, in which case, the quality scores will @@ -197,7 +194,8 @@ that the ends of sequences are lower quality and doubled base calls are the most frequent sequencing error. minimum_length: the minimum length two DNA fragments must overlap to be - considered as a possible assembly. + considered as a possible assembly (warning: in tests I did, this option + did not work as expected...) include_singlets: a flag which indicates that singletons (assemblies made up of a single DNA fragment) should be included in the lassie output_file - the default is to not include singletons. From bugzilla-daemon at portal.open-bio.org Sat Jul 25 04:11:49 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 25 Jul 2009 04:11:49 -0400 Subject: [Bioperl-guts-l] [Bug 2885] New: bug with searchio Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2885 Summary: bug with searchio Product: BioPerl Version: unspecified Platform: Other OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Bio::Search/Bio::SearchIO AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: gilgi.fri at gmail.com Hello, I am using searchio quite a lot, and it usually very useful. However I now have a blast output that gives me the following message: substr outside of string at /usr/share/perl5/Bio/SearchIO/blast.pm line 1815, line 2495. substr outside of string at /usr/share/perl5/Bio/SearchIO/blast.pm line 1815, line 2496. ------------- EXCEPTION: Bio::Root::Exception ------------- MSG: no data for midline Posted date: Jul 23, 2009 10:30 AM STACK: Error::throw STACK: Bio::Root::Root::throw /usr/share/perl5/Bio/Root/Root.pm:359 STACK: Bio::SearchIO::blast::next_result /usr/share/perl5/Bio/SearchIO/blast.pm:1813 STACK: ./parse-genomic-virus-blast.pl:30 ----------------------------------------------------------- Can you help please? Thanks, Gilgi -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 26 00:58:36 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 26 Jul 2009 00:58:36 -0400 Subject: [Bioperl-guts-l] [Bug 2885] bug with searchio In-Reply-To: Message-ID: <200907260458.n6Q4waOb016780@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2885 gilgi.fri at gmail.com changed: What |Removed |Added ---------------------------------------------------------------------------- AssignedTo|bioperl-guts-l at bioperl.org |gilgi.fri at gmail.com Status|NEW |ASSIGNED ------- Comment #1 from gilgi.fri at gmail.com 2009-07-26 00:58 EST ------- Created an attachment (id=1350) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1350&action=view) The blast output that gives the error message The attached file is the blast output that fives the eroor message when trying to parse it with the searchio module -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Jul 26 02:21:30 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 26 Jul 2009 02:21:30 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907260621.n6Q6LUwV018913@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #10 from jinjp at mail.cbi.pku.edu.cn 2009-07-26 02:21 EST ------- Hi,Mark, there maybe a small mistake in MapTiling.pm(line 183)-using an array @hsps without values. following is the code after modified about line 179 to 191: _______________________________________________________________ my @hsps=$hit->hsps; # apply filter function if requested if ( defined $filter ) { if ( ref($filter) eq 'CODE' ) { @hsps = map { $filter->($_) ? $_ : () } @hsps; } else { $self->warn("-filter is not a coderef; ignoring"); } } # else { # @hsps = $hit->hsps; # } _________________________________________________________________ Thanks -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Jul 27 08:31:31 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 27 Jul 2009 08:31:31 -0400 Subject: [Bioperl-guts-l] [Bug 2884] the coverage parsed by bioperl from blast result is great than 1 In-Reply-To: Message-ID: <200907271231.n6RCVVUi032257@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2884 ------- Comment #11 from maj at fortinbras.us 2009-07-27 08:31 EST ------- Hi Jin-- You're right-- I'll make that change! cheers MAJ (In reply to comment #10) > Hi,Mark, there maybe a small mistake in MapTiling.pm(line 183)-using an array > @hsps without values. > following is the code after modified about line 179 to 191: > _______________________________________________________________ > my @hsps=$hit->hsps; > # apply filter function if requested > if ( defined $filter ) { > if ( ref($filter) eq 'CODE' ) { > @hsps = map { $filter->($_) ? $_ : () } @hsps; > } > else { > $self->warn("-filter is not a coderef; ignoring"); > } > } > # else { > # @hsps = $hit->hsps; > # } > > _________________________________________________________________ > Thanks > -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Jul 27 08:52:39 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 27 Jul 2009 08:52:39 -0400 Subject: [Bioperl-guts-l] [Bug 2857] [TODO] NextGen sequencing updates In-Reply-To: Message-ID: <200907271252.n6RCqdZ3001150@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2857 ------- Comment #1 from cjfields at bioperl.org 2009-07-27 08:52 EST ------- Support for FASTQ added but need to address a few bugs. Consistency with output of second header: http://lists.open-bio.org/pipermail/bioperl-l/2009-July/030698.html Consistency when converting to Illumina 1.3: http://lists.open-bio.org/pipermail/bioperl-l/2009-July/030700.html Solexa conversions: http://lists.open-bio.org/pipermail/bioperl-l/2009-July/030699.html -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Jul 27 09:24:16 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 27 Jul 2009 09:24:16 -0400 Subject: [Bioperl-guts-l] [Bug 2886] New: Bioperl-network parsing error Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2886 Summary: Bioperl-network parsing error Product: BioPerl Version: unspecified Platform: PC OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: bioperl-network AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: sirisha at mycib.ac.uk I'm trying to use Bio::Network (version 1.6.0) to parse the PSI-MI v2.5 file "HPRD_SINGLE_PSIMI_070609.xml" which I've downloaded from the HPRD website (http://www.hprd.org/download). The parsing script gives the following errors: No fullName for interactor Aldehyde dehydrogenase 1 Use of uninitialized value in string eq at /opt/bioperl_1.6.0/lib/perl5//Bio/Network/IO/psi25.pm line 376. Use of uninitialized value in string eq at /opt/bioperl_1.6.0/lib/perl5//Bio/Network/IO/psi25.pm line 376. Use of uninitialized value in string eq at /opt/bioperl_1.6.0/lib/perl5//Bio/Network/IO/psi25.pm line 376. Use of uninitialized value in string eq at /opt/bioperl_1.6.0/lib/perl5//Bio/Network/IO/psi25.pm line 376. Use of uninitialized value in string eq at /opt/bioperl_1.6.0/lib/perl5//Bio/Network/IO/psi25.pm line 376. Use of uninitialized value in string eq at /opt/bioperl_1.6.0/lib/perl5//Bio/Network/IO/psi25.pm line 376. Segmentation fault I've tried with numerous other PSI-MI v2.5 files, and the only ones that "work" are those from MINT - all the others give the same "Use of uninitialized value" error as above. Other files I've tried are from DIP, BioGRID and MPact. My Perl is v5.8.8 built for x86_64-linux-thread-multi. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From maj at dev.open-bio.org Mon Jul 27 09:28:47 2009 From: maj at dev.open-bio.org (Mark Allen Jensen) Date: Mon, 27 Jul 2009 09:28:47 -0400 Subject: [Bioperl-guts-l] [15888] bioperl-live/trunk/Bio/Search/Tiling/MapTiling.pm: patch from Jin - thanks! Message-ID: <200907271328.n6RDSk2N012246@dev.open-bio.org> Revision: 15888 Author: maj Date: 2009-07-27 09:28:45 -0400 (Mon, 27 Jul 2009) Log Message: ----------- patch from Jin - thanks! Modified Paths: -------------- bioperl-live/trunk/Bio/Search/Tiling/MapTiling.pm Modified: bioperl-live/trunk/Bio/Search/Tiling/MapTiling.pm =================================================================== --- bioperl-live/trunk/Bio/Search/Tiling/MapTiling.pm 2009-07-24 19:25:33 UTC (rev 15887) +++ bioperl-live/trunk/Bio/Search/Tiling/MapTiling.pm 2009-07-27 13:28:45 UTC (rev 15888) @@ -176,7 +176,7 @@ $self->_set_attributes(); $self->{"_algorithm"} = $hit->algorithm; - my @hsps; + my @hsps = $hit->hsps; # apply filter function if requested if ( defined $filter ) { if ( ref($filter) eq 'CODE' ) { @@ -186,9 +186,6 @@ $self->warn("-filter is not a coderef; ignoring"); } } - else { - @hsps = $hit->hsps; - } # identify available contexts for my $t qw( query hit ) { From bugzilla-daemon at portal.open-bio.org Tue Jul 28 11:00:15 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 28 Jul 2009 11:00:15 -0400 Subject: [Bioperl-guts-l] [Bug 2888] New: When reading blast results: spaces at end of line in multi-line query description are lost Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2888 Summary: When reading blast results: spaces at end of line in multi-line query description are lost Product: BioPerl Version: 1.6 branch Platform: Other OS/Version: Linux Status: NEW Severity: minor Priority: P2 Component: Bio::Search/Bio::SearchIO AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: eyal.privman at gmail.com I use Bio::SearchIO from BioPerl version 1.6 to read results of NCBI BLAST version 2.2.16, where the description of the query sequence (that was originally the fasta header line) was broken to several lines. See example below. When I use: $result = $blastIO->next_result $result->query_description I get the description as one string, but no space is separation the last word on the line from the first word on the next line. (In the example below "gene" and "for" will be "genefor") This doesn't happen with bioperl 1.5.2 - I get the description with a space between the two words. Example blast input: TBLASTX 2.2.16 [Mar-25-2007] Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402. Query= dbj|AB027451.1|_TRUNC_466_3843 Saccharomyces bayanus HO gene for endonuclease for mating-type conversion, complete cds (3378 letters) ... ... ... -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Jul 28 11:26:56 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 28 Jul 2009 11:26:56 -0400 Subject: [Bioperl-guts-l] [Bug 2888] When reading blast results: spaces at end of line in multi-line query description are lost In-Reply-To: Message-ID: <200907281526.n6SFQuTF028567@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2888 eyal.privman at gmail.com changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |eyal.privman at gmail.com Status|NEW |RESOLVED Resolution| |LATER ------- Comment #1 from eyal.privman at gmail.com 2009-07-28 11:26 EST ------- Checked it again and now I see that it does add a space in some cases. I'm not sure that I understand this correctly. I need to do some more testing... Sorry. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Jul 28 11:34:56 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 28 Jul 2009 11:34:56 -0400 Subject: [Bioperl-guts-l] [Bug 2888] When reading blast results: spaces at end of line in multi-line query description are lost In-Reply-To: Message-ID: <200907281534.n6SFYuaX028816@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2888 ------- Comment #2 from eyal.privman at gmail.com 2009-07-28 11:34 EST ------- Created an attachment (id=1351) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1351&action=view) Example blast input file with long query description In this example $result->query_description addes a space between the first and second lines, but not between the second and third lines. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Jul 28 11:39:51 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 28 Jul 2009 11:39:51 -0400 Subject: [Bioperl-guts-l] [Bug 2888] When reading blast results: spaces at end of line in multi-line query description are lost In-Reply-To: Message-ID: <200907281539.n6SFdpPl029010@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2888 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|RESOLVED |REOPENED Resolution|LATER | ------- Comment #3 from cjfields at bioperl.org 2009-07-28 11:39 EST ------- Um, wow. That's a bit long for a query name. Regardless, we can probably do something about it; I can see the spot where this might be going wrong, so we can possibly get this in for the next release. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Jul 28 11:40:37 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 28 Jul 2009 11:40:37 -0400 Subject: [Bioperl-guts-l] [Bug 2888] When reading blast results: spaces at end of line in multi-line query description are lost In-Reply-To: Message-ID: <200907281540.n6SFebQk029071@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2888 ------- Comment #4 from eyal.privman at gmail.com 2009-07-28 11:40 EST ------- OK, now I found where I see this problem (in the example I wrote in the original description of the bug I copied a blast input that *doesn't* produce the bug). I attached an example input file where $result->query_description adds a space between the first and second lines, but not between the second and third lines. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Jul 28 11:42:10 2009 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 28 Jul 2009 11:42:10 -0400 Subject: [Bioperl-guts-l] [Bug 2888] When reading blast results: spaces at end of line in multi-line query description are lost In-Reply-To: Message-ID: <200907281542.n6SFgAs0029137@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2888 ------- Comment #5 from eyal.privman at gmail.com 2009-07-28 11:42 EST ------- (In reply to comment #3) > Um, wow. That's a bit long for a query name. Regardless, we can probably do > something about it; I can see the spot where this might be going wrong, so we > can possibly get this in for the next release. > Thanks! That would be great. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From chmille4 at dev.open-bio.org Wed Jul 29 09:35:30 2009 From: chmille4 at dev.open-bio.org (Chase Miller) Date: Wed, 29 Jul 2009 09:35:30 -0400 Subject: [Bioperl-guts-l] [15889] bioperl-dev/trunk: Turned Bio::Nexml::Util module into Bio::Nexml:: Factory module. Message-ID: <200907291335.n6TDZUnw021813@dev.open-bio.org> Revision: 15889 Author: chmille4 Date: 2009-07-29 09:35:29 -0400 (Wed, 29 Jul 2009) Log Message: ----------- Turned Bio::Nexml::Util module into Bio::Nexml::Factory module. Adressed some comments about coding style from Mark Jenson Modified Paths: -------------- bioperl-dev/trunk/Bio/AlignIO/nexml.pm bioperl-dev/trunk/Bio/Nexml/Util.pm bioperl-dev/trunk/Bio/Nexml.pm bioperl-dev/trunk/Bio/SeqIO/nexml.pm bioperl-dev/trunk/Bio/TreeIO/nexml.pm bioperl-dev/trunk/t/AlignIO/nexml.t bioperl-dev/trunk/t/SeqIO/nexml.t bioperl-dev/trunk/t/TreeIO/nexml.t bioperl-dev/trunk/t/nexml.t Added Paths: ----------- bioperl-dev/trunk/Bio/Nexml/Factory.pm Modified: bioperl-dev/trunk/Bio/AlignIO/nexml.pm =================================================================== --- bioperl-dev/trunk/Bio/AlignIO/nexml.pm 2009-07-27 13:28:45 UTC (rev 15888) +++ bioperl-dev/trunk/Bio/AlignIO/nexml.pm 2009-07-29 13:35:29 UTC (rev 15889) @@ -67,6 +67,7 @@ use Bio::Phylo::IO qw(parse unparse); use Bio::LocatableSeq; use Bio::Nexml::Util; +use Benchmark; use base qw(Bio::AlignIO); @@ -94,6 +95,13 @@ #Add sub rewind? +sub benchmark_parse { + my $aln = next_aln(@_); + my $self = shift; + $self->{'_parsed'} = 0; + return $aln; +} + sub _parse { my ($self) = @_; Added: bioperl-dev/trunk/Bio/Nexml/Factory.pm =================================================================== --- bioperl-dev/trunk/Bio/Nexml/Factory.pm (rev 0) +++ bioperl-dev/trunk/Bio/Nexml/Factory.pm 2009-07-29 13:35:29 UTC (rev 15889) @@ -0,0 +1,537 @@ +# $Id: Util.pm 15875 2009-07-21 19:20:00Z chmille4 $ +# +# BioPerl module for Bio::Nexml::Factory +# +# Please direct questions and support issues to +# +# Cared for by Chase Miller +# +# Copyright Chase Miller +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Nexml::Factory - A factory module for creating BioPerl and Bio::Phylo objects from/to nexml documents + +=head1 SYNOPSIS + + Do not use this module directly. It shoulde be used through + Bio::Nexml, Bio::SeqIO::nexml, Bio::AlignIO::nexml, or + Bio::TreeIO::nexml + + +=head1 DESCRIPTION + +This is a factory/utility module in the nexml namespace. It contains methods +that are needed by multiple modules. + +A few key design issues pertaining to this module will be +described here. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l at bioperl.org - General discussion + http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +L + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +the web: + + http://bugzilla.open-bio.org/ + +=head1 AUTHOR - Chase Miller + +Email chmille4 at gmail.com + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +#Let the code begin + +package Bio::Nexml::Factory; + +use strict; + +use Bio::Phylo::Matrices::Matrix; +use Bio::Phylo::Matrices::Datatype::Rna; +use Bio::SeqFeature::Generic; + + +use base qw(Bio::Root::Root); + +my $fac = Bio::Phylo::Factory->new(); + + + +sub new { + my($class, at args) = @_; + my $self = $class->SUPER::new(@args); +} + +sub create_bperl_aln { + my ($self, $proj) = @_; + my ($start, $end, $seq, $desc); + my $taxa = $proj->get_taxa(); + my $matrices = $proj->get_matrices(); + my @alns; + + foreach my $matrix (@$matrices) + { + my $aln = Bio::SimpleAlign->new(); + + #check if mol_type is something that makes sense to be a seq + my $mol_type = lc($matrix->get_type()); + unless ($mol_type eq 'dna' || $mol_type eq 'rna' || $mol_type eq 'protein') + { + next; + # something for the back-burner: BioPerl has objects + # to handle arbitrary genotypes; might be cool to + # be able to create something besides alignments + # here .../maj + } + + my $basename = $matrix->get_name(); + $aln->id($basename); + + my $rows = $matrix->get_entities(); + my $seqNum = 0; + foreach my $row (@$rows) + { + my $newSeq = $row->get_char(); + my $rowlabel; + $seqNum++; + + #constuct seqID based on matrix label and row id + my $seqID = "$basename.row_$seqNum"; + + #Check if theres a row label and if not default to seqID + if( !defined($rowlabel = $row->get_name())) {$rowlabel = $seqID;} + + $seq = Bio::LocatableSeq->new( + -seq => $newSeq, + -display_id => "$rowlabel", + #-description => $desc, + -alphabet => $mol_type, + ); + my $feat; + #check if taxon linked to sequence if so create feature to attach to alignment + foreach my $taxa_o (@$taxa) + { + my $taxa_ents = $taxa_o->get_entities(); + foreach my $taxon (@$taxa_ents) + { + if($taxon eq $row->get_taxon) + { + my $taxon_name = $taxon->get_name(); + $feat = Bio::SeqFeature::Generic->new(); + $feat->add_tag_value('taxon', "$taxon_name"); + $feat->add_tag_value('id', "$seqID"); + } + } + } + + $aln->add_seq($seq); + $aln->add_SeqFeature($feat); + $self->debug("Reading r$seqID\n"); + + + } + push (@alns, $aln); + } + return \@alns; +} +#PODPODPOD +sub create_bperl_tree { + my($self, $proj) = @_; + my @trees; + #my $taxa = $proj->get_taxa(); + my $forests = $proj->get_forests(); + + foreach my $forest (@$forests) + { + my $basename = $forest->get_name(); + my $taxa = $forest->get_taxa(); + my $trees = $forest->get_entities(); + + foreach my $t (@$trees) + { + my %created_nodes; + my $tree_id = $t->get_name(); + my $tree = Bio::Tree::Tree->new(-id => "$basename.$tree_id"); + + + + #process terminals only, removing terminals as they get processed + #which inturn creates new terminals to process until the entire tree has been processed + my $terminals = $t->get_terminals(); + for(my $i=0; $i<@$terminals; $i++) + { + my $terminal = $$terminals[$i]; + my $new_node_id = $terminal->get_name(); + my $newNode; + + if(exists $created_nodes{$new_node_id}) + { + $newNode = $created_nodes{$new_node_id}; + } + else + { + $newNode = Bio::Tree::Node->new(-id => $new_node_id); + $created_nodes{$new_node_id} = $newNode; + } + + #transfer attributes that apply to all nodes + #check if taxa data exists for the current node ($terminal) + my $taxa_ents = $taxa->get_entities(); + foreach my $taxon (@$taxa_ents) + { + if($taxon eq $terminal->get_taxon()) { + $newNode->add_tag_value("taxon", $taxon->get_name()); + } + } + + #check if you've reached the root of the tree and if so, stop. + if($terminal->is_root()) { + $tree->set_root_node($newNode); + last; + } + + #transfer attributes that apply to non-root only nodes + $newNode->branch_length($terminal->get_branch_length()); + + my $parent = $terminal->get_parent(); + my $parentID = $parent->get_name(); + if(exists $created_nodes{$parentID}) + { + $created_nodes{$parentID}->add_Descendent($newNode); + } + else + { + my $parent_node = Bio::Tree::Node->new(-id => $parentID); + $parent_node->add_Descendent($newNode); + $created_nodes{$parentID} = $parent_node; + } + #remove processed node from tree + $parent->prune_child($terminal); + + #check if the parent of the removed node is now a terminal node and should be added for processing + if($parent->is_terminal()) + { + push(@$terminals, $terminal->get_parent()); + } + } + push @trees, $tree; + } + } + return \@trees; +} + +sub create_bperl_seq { + my($self, $proj) = @_; + my $matrices = $proj->get_matrices(); + my $taxa = $proj->get_taxa(); + my @seqs; + + foreach my $matrix (@$matrices) + { + #check if mol_type is something that makes sense to be a seq + my $mol_type = lc($matrix->get_type()); + unless ($mol_type eq 'dna' || $mol_type eq 'rna' || $mol_type eq 'protein') + { + next; + } + + my $rows = $matrix->get_entities(); + my $seqnum = 0; + my $basename = $matrix->get_name(); + foreach my $row (@$rows) + { + my $newSeq = $row->get_char(); + + $seqnum++; + #construct full sequence id by using bio::phylo "matrix label" and "row id" + my $seqID = "$basename.seq_$seqnum"; + my $rowlabel; + #check if there is a label for the row, if not default to seqID + if (!defined ($rowlabel = $row->get_name())) {$rowlabel = $seqID;} + + + #build the seq object using the factory create method + + my $seqbuilder = new Bio::Seq::SeqFactory('-type' => 'Bio::Seq'); + + my $seq = $seqbuilder->create( + -seq => $newSeq, + -id => $rowlabel, + -primary_id => $seqID, + #-desc => $fulldesc, + -alphabet => $mol_type, + -direct => 1, + ); + #check if taxon linked to sequence if so create feature to attach to alignment + my $feat; + foreach my $taxa_o (@$taxa) + { + my $taxa_ents = $taxa_o->get_entities(); + foreach my $taxon (@$taxa_ents) + { + if($taxon eq $row->get_taxon) + { + my $taxon_name = $taxon->get_name(); + $feat = Bio::SeqFeature::Generic->new(); + $feat->add_tag_value('taxon', "$taxon_name"); + $feat->add_tag_value('id', $seqID); + last; + } + } + } + $seq->add_SeqFeature($feat); + push (@seqs, $seq); + #what other data is appropriate to pull over from bio::phylo::matrices::matrix?? + } + } + return \@seqs; +} + +sub create_bphylo_tree { + my ($self, $bptree) = @_; + #most of the code below ripped form Bio::Phylo::Forest::Tree::new_from_bioperl()d + + my $tree = $fac->create_tree; + my $taxa = $fac->create_taxa; + + my $class = 'Bio::Phylo::Forest::Tree'; + + if ( Scalar::Util::blessed $bptree && $bptree->isa('Bio::Tree::TreeI') ) { + bless $tree, $class; + ($tree, $taxa) = _copy_tree( $tree, $bptree->get_root_node, "", $taxa); + + # copy name + my $name = $bptree->id; + $tree->set_name( $name ) if defined $name; + + # copy score + my $score = $bptree->score; @@ Diff output truncated at 10000 characters. @@