[Bioperl-l] Bio::Assembly::IO::ace with CAP3 ace files

Ewan Birney birney@ebi.ac.uk
Tue, 24 Dec 2002 11:47:34 +0000 (GMT)


On Mon, 23 Dec 2002, Marc Logghe wrote:

> Hi,
> I had to hack into Bio::Assembly::IO::ace a little to make it work with ace
> files generated by CAP3 and to allow handling of multiple assemblies per
> file.
> The diff is included in case somebody would need it also (revision 1.1 is
> the initial commit on bioperl-live). I hope I did not break anything; did
> not test it with phrap ace files.
> Great module, I especially like the change_coord() method to change between
> the 4 coordinate systems. Really cool !!! Thank you Robson F. de Souza !

Ideally before applying this I would like to get it ok'd by someone who
use the Bio::Assembly::IO::ace (ideally Robson himself I guess) as I don't
know what is going on there. If not, I am tempted to leave this until
1.3/4 series


any other views?


> Cheers,
> Marc
>
>
> ===================================================================
> RCS file: /usr/local/cvs/scripts/ace.pm,v
> retrieving revision 1.1
> retrieving revision 1.3
> diff -u -r1.1 -r1.3
> --- scripts/ace.pm	2002/12/23 06:12:19	1.1
> +++ scripts/ace.pm	2002/12/23 18:52:48	1.3
> @@ -1,4 +1,4 @@
> -# $Id: ace.pm,v 1.1 2002/12/23 06:12:19 marcl Exp $
> +# $Id: ace.pm,v 1.3 2002/12/23 18:52:48 marcl Exp $
>  #
>  ## BioPerl module for Bio::Assembly::IO::ace
>  #
> @@ -118,24 +118,26 @@
>  sub next_assembly {
>      my $self = shift; # Object reference
>      local $/="\n";
> +    my $AS_flag;
>
>      # Resetting assembly data structure
>      my $assembly = Bio::Assembly::Scaffold->new(-source=>'phrap');
> -
>      # Looping over all ACE file lines
>      my ($contigOBJ,$read_name);
>      my $read_data = {}; # Temporary holder for read data
> +    READLINE:
>      while ($_ = $self->_readline) { # By now, ACE files hold a single
> assembly
>  	chomp;
>
>  	# Loading assembly information (ASsembly field)
> -#	(/^AS (\d+) (\d+)/) && do {
> +	(/^AS (\d+) (\d+)/) && do {
> +            last READLINE if $AS_flag++;
>  #	    $assembly->_set_nof_contigs($1);
>  #	    $assembly->_set_nof_sequences_in_contigs($2);
> -#	};
> +	};
>
>  	# Loading contig sequence (COntig sequence field)
> -	(/^CO Contig(\d+) (\d+) (\d+) (\d+) (\w+)/) && do { # New contig
> found!
> +	(/^CO (.*Contig\d+) (\d+) (\d+) (\d+) (\w+)/) && do { # New contig
> found!
>  	    my $contigID = $1;
>  	    $contigOBJ = Bio::Assembly::Contig->new(-source=>'phrap',
> -id=>$contigID);
>  #	    $contigOBJ->set_nof_bases($2); # Contig length in base pairs
> @@ -197,7 +199,7 @@
>  	};
>
>  	# Loading read info... (Assembled From field)
> -	/^AF (\S+) (C|U) (-*\d+)/ && do {
> +	/^AF (\S+).* (C|U) (-*\d+)$/ && do {
>  	    $read_name = $1; my $ori = $2;
>  	    $read_data->{$read_name}{'padded_start'} = $3; # aligned start
>  	    $ori = ( $ori eq 'U' ? 1 : -1);
> @@ -212,7 +214,7 @@
>  #	};
>
>  	# Loading reads... (ReaD sequence field
> -	/^RD (\S+) (-*\d+) (\d+) (\d+)/ && do {
> +	/^RD (\S+).* (-*\d+) (\d+) (\d+)$/ && do {
>  	    $read_name = $1;
>  	    $read_data->{$read_name}{'length'} = $2; #
> number_of_padded_bases
>  	    $read_data->{$read_name}{'contig'} = $contigOBJ;
> @@ -243,7 +245,6 @@
>  						      );
>  	    $contigOBJ->set_seq_coord($coord,$read);
>  	};
> -
>  	# Loading read trimming and alignment ranges...
>  	/^QA (-?\d+) (-?\d+) (-?\d+) (-?\d+)/ && do {
>  	    my $qual_start  = $1; my $qual_end  = $2;
> @@ -357,6 +358,8 @@
>  	};
>
>      } # while ($_ = $self->_readline)
> +
> +    return undef unless ($assembly->all_contigs); # otherwise endless loop
>
>      $assembly->update_seq_list();
>      return $assembly;
>
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l@bioperl.org
> http://bioperl.org/mailman/listinfo/bioperl-l
>