[Bioperl-l] Format Validator

arshad mohammed arshad25 at gmail.com
Sat May 24 03:36:54 UTC 2008


Hi All,

I am putting some effort to write a sub module to validate bio sequences, as
my  first  attempt to contribute something to bioperl.
It is quite premature as it can validate only FASTA format at this stage. I
would really like to have some feedback from all of you guys to improve and
bioperlify it.
Following is the module and the snippet how to use it.

##############################
#############################
##FormatValidator.pm

package FormatValidator;
use strict;
use Carp;
use warnings;
use version;
our $VERSION = qv('0.0.1');

#I am a poor constructor with no attributes
sub new
{
 my ($class) = @_;

 my $object = bless {}, $class;
 return $object;
}

#I will check if the sequence satisfy rules of "Fasta" and if it is, I will
return "1"
sub is_fasta
{
 my ($self, $file) = @_;
 return 0 if !defined $_[1];

 #I am not sure the user passed in a "File-Handler" or an "Array reference".
 #I am flexible enough to read the data from both. So I will make sure
 #what actually the user passed in and read it accordingly with the help of
my bro "readfile".
 my @file_data = $self->readfile($file);
 for (@file_data)
 {
  if ($_ =~ /^>/)
  {

   #Bang!! This is the only identifier I know 'bt FASTA, so no point in
further reading
   return 1;
  }
  else
  {
   return 0;
  }
 }
 return undef;
}

#I will Read the file data from either File Handler or Array and pass it to
the caller
sub readfile
{
 my ($self, $file) = @_;

 #If it is file Handler
 if (ref($file) eq 'IO::Handle')
 {
  my @file = <$file>;
  return @file;
 }

 #Or if it is Array reference
 elsif (ref($file) eq 'ARRAY')
 {
  return @{$file};
 }

 #If it is anything else
 else
 {
  carp "I can read only Array reference or File Handler, But this is
something else !\n";
  return;
 }
}

1;
##################################
##################################

#test.pl

use FormatValidator;
use strict;
open FH, '<test.txt' || die "couldnt Open"; # sequence  file
my $validator = FormatValidator->new();

#pass either file handler
if ($validator->is_fasta(*FH{IO}))
{
 print "Its a FASTA..\n";
}
else
{
 print "Its not a FASTA\n";
}
close FH;

# or the array reference of the file content
open FH, '<test.txt' || die "couldnt Open";
my @file_data = <FH>;
if ($validator->is_fasta(\@file_data))
{
 print "Its a FASTA..\n";
}
else
{
 print "Its not a FASTA\n";
}

#Test for an invalid condition
my $invalid = $validator->is_fasta($validator);

if (!defined $invalid)
{
 print "DIE..\n";
}
else
{
 print "Something Wrong in my module";
}

Perl ly
Arshad Mohammed
-- 
                 \\\|///
                \\ - - //
               ( @ @ )
--------o00o-(_)-o00o-----------



More information about the Bioperl-l mailing list