#use strict;
#use warnings;

use Bio::Phenotype::OMIM::OMIMparser;
use Bio::Annotation::Reference;
use Bio::Phenotype::OMIM::OMIMentryAllelicVariant;


###################################################################################################################
### This script parses OMIM data and writes into outputfiles ready for loading					###
###################################################################################################################
### Version 1.0 												###
### Dated 1st August 2007											###
### Author : Neeti Somaiya											###
###################################################################################################################

my $file = <$ARGV[0]>;
my $count=0;

my $now = localtime;
print "\nSTART : $now\n";

my $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new(-omimtext => $file);

while ( my $omim_entry = $omim_parser->next_phenotype() ) 
{
	$count++;

	my $numb = getOmimId($omim_entry);

	my $title = getOmimTitle($omim_entry);

	my $alt = getAltTitles($omim_entry);

	my $desc = getDescription($omim_entry);

	my $create_date = getCreateDate($omim_entry);

	my $last_update_date = getLastUpdateDate($omim_entry);

	my $clinical_symptoms = getClinicalSymptoms($omim_entry);

	my $pubmed_ids = getPubmedIds($omim_entry);

	my $allelic_variants = getAllelicVariants($omim_entry);

	print "\n********************\n";

        #last if ($count>10);
}

$now = localtime;
print "\nEND : $now\n";

sub getOmimId
{
	my $omim_entry = shift;

    	my $numb  = $omim_entry->MIM_number();                     # *FIELD* NO
	my $mtt   = $omim_entry->more_than_two_genes();            # "#" before title
        if($mtt == 1)
        {
                $numb = "\#".$numb;
        }
        my $sep   = $omim_entry->is_separate();                    # "*" before title
        if($sep == 1)
        {
                $numb = "\*".$numb;
        }
	
	if($numb ne "")
	{
		print "\nOMIM ID : $numb\n";
	}
	else
	{
		$numb = "\\N";
	}
	return($numb);
}

sub getOmimTitle
{
	my $omim_entry = shift;

    	my $title = $omim_entry->title();                          # *FIELD* TI - first line
	if($title =~ /(\d+?)\s(.*)/)
	{
		$title = $2;
	}
	if($title ne "")
	{
		print "\nTITLE : $title\n";
	}
	else
	{
		$title = "\\N";
	}
	return($title);
}

sub getAltTitles
{
	my $omim_entry = shift;
	
    	my $alt   = $omim_entry->alternative_titles_and_symbols(); # *FIELD* TI - additional lines
	my @all_names = split("\;\;",$alt);
	$alt = join("\#\#",@all_names);
	@all_names = split("\n",$alt);
	$alt = join("",@all_names);
	if($alt =~ /^\#\#(.*)/)
	{
		$alt = $1;
	}
	
	if($alt ne "")
	{
		print "\nALTERNATIVE TITLES : $alt\n";
	}
	else
	{
		$alt = "\\N";
	}
	return($alt);
}

sub getDescription
{
	my $omim_entry = shift;

    	my $desc  = $omim_entry->description();                    # *FIELD* TX
	my @each_desc = split("\n\n",$desc);
	#my $all_desc = join("\#\#",@each_desc);
	my $all_desc = join("  ",@each_desc);
	my @each_desc1 = split("\n",$all_desc);
	my $all_desc1 = join("",@each_desc1);

	if($all_desc1 ne "")
	{
		print "\nDESCRIPTION : $all_desc1\n";
	}
	else
	{
		$all_desc1 = "\\N";
	}
	return($all_desc1)
}

sub getCreateDate
{
	my $omim_entry = shift;
	
       	my $cr    = $omim_entry->created();                        # *FIELD* CD
	if($cr =~ /.*?\:\s(.*)/)
	{
		$cr = $1;
	}
	
	$cr = formatDate($cr);

	if($cr ne "")
	{
		print "\nCREATE DATE : $cr\n";
	}
	else
	{
		$cr = "\\N";
	}
	return($cr);
}

sub getLastUpdateDate
{
	my $omim_entry = shift;

    	my $cont  = $omim_entry->contributors();                   # *FIELD* CN
	my @all_cont = split("\: ",$cont);
	my $update = $all_cont[$#all_cont];

	$update = formatDate($update);

	if($update ne "")
	{
		print "\nLAST UPDATE DATE : $update\n";
	}
	else
	{
		$update = "\\N";
	}
	return($update);
}

sub getClinicalSymptoms
{
	my $omim_entry = shift;

	my @category = ();
	my $data = "";
	my @content = ();

	# *FIELD* CS
	foreach my $key (%{$omim_entry->clinical_symptoms})
	{
		my @values = ();
		if(ref($key) ne 'ARRAY')
		{
			push(@category,$key);
		}
		else
		{
			foreach my $value (@{$key})
			{
				push(@values,$value);
			}
			$data = join(",",@values);
			push(@content,$data);
		}

	}
	my @all = ();
	for(my $i=0;$i<=$#category;$i++)
	{
		my $total = $category[$i]."\:".$content[$i];
		push(@all,$total);
	}
	my $clinical_symptoms = join("\#\#",@all);
	if($clinical_symptoms ne "")
	{	
		print "\nCLINICAL SYMPTOMS : $clinical_symptoms)\n";
	}
	else
	{
		$clinical_symptoms = "\\N";
	}
	return($clinical_symptoms);
}
	
sub getPubmedIds
{
	my $omim_entry = shift;
	
    	my @refs       = $omim_entry->each_Reference();            # *FIELD* RF
      	# Array of Bio::Annotation::Reference objects.
	
	my @pubmeds = ();	

	foreach my $ref (@refs)
	{
		my $pubmed = $ref->pubmed;
		if($pubmed ne "")
		{
			push(@pubmeds,$pubmed);
		}
	}
	my $all_refs = join("\#\#",@pubmeds);
	if($all_refs ne "")
	{
		print "\nPUBMED : $all_refs\n";
	}
	else
	{
		$all_refs = "\\N";
	}
	return($all_refs);
}

sub getAllelicVariants
{
	my $omim_entry = shift;
	my @all = ();
	my @avs = $omim_entry->each_AllelicVariant();       # *FIELD* AV
      	foreach my $av (@avs)
	{
		my $all_data = "";
		
		my $tit = $av->title;
		if($tit ne "")
		{
			$all_data = $all_data."TITLE \: $tit\,";
		}
		my $sym = $av->symbol;
		if($sym ne "")
		{
			$all_data = $all_data."SYMBOL \: $sym\,";
		}
		my $des = $av->description;
		my @all_des = split("\n",$des);
		$des = join("",@all_des);
		if($des ne "")
		{
			$all_data = $all_data."DESCRIPTION \: $des";
		}
		push(@all,$all_data);
	}
	my $allelic_variants = join("\#\#",@all);
}

sub formatDate
{
	my $date = shift;
	my $new_date = "";

	if($date =~ /(.*?)\/(.*?)\/(.*)/)
	{
		my $month = $1;
		my $day = $2;
		my $year = $3;
	
		if($month < 10)
		{
			$month = "0".$month;
		}
		if($day < 10)
		{
			$day = "0".$day;
		}
		
		$new_date = $year."\-".$month."\-".$day;
	}
	else
	{
		$new_date = "";
	}
	return($new_date);
}
	
