package NucSubstModel;
use strict;
use warnings;
use List::Util qw(sum);
use Bio::Root::Root;
use Bio::Tools::IUPAC;
use vars qw(@ISA);
@ISA = qw(Bio::Root::Root);

sub new {
    my $class = shift;
    my $self = {
        'matrix' => [
        # TO: A     C     G     T
            [ 0.25, 0.25, 0.25, 0.25 ], # FROM: A
            [ 0.25, 0.25, 0.25, 0.25 ], # FROM: C
            [ 0.25, 0.25, 0.25, 0.25 ], # FROM: G
            [ 0.25, 0.25, 0.25, 0.25 ], # FROM: T
        ],
        'transl' => {
            'A'  => { 'id' => 0, 'freq' => 0.25 }, # ids are for matrix lookup
            'C'  => { 'id' => 1, 'freq' => 0.25 },
            'G'  => { 'id' => 2, 'freq' => 0.25 },
            'T'  => { 'id' => 3, 'freq' => 0.25 },
        },        
        'gshape' => 0, # gamma shap parameter alpha
        'pinvar' => 0, # proportion of invariant sites
        'mu'     => 1, # mutation rate paramter mu
    };
    bless $self, $class;
    return $self;
}

# gets/sets nucleotide frequencies, one at a time. Adjusts matrix.
# example: $model->freq( 'A' ) returns 0.25
# example: $model->freq( 'N' ) returns 1
# example: $model->freq( 'A' => 0.5 ) scales C, G, T downward.
# does not adjust when ambiguous symbols used, e.g.
# $model->freq( 'N' => 0.5 ) doesn't work
sub freq {
    my ( $self, $nuc, $freq ) = ( $_[0], uc $_[1], $_[2] );
    if ( exists $Bio::Tools::IUPAC::IUB{$nuc} ) {
        if ( exists $self->{'transl'}->{$nuc} ) {
            if ( $freq and $freq > 0 and $freq < 1 ) {
                my %tmp = ( $nuc => 1 );
                my $diff = $self->{'transl'}->{$nuc}->{'freq'} - $freq;
                $self->{'transl'}->{$nuc}->{'freq'} = $freq;            
                for ( 'A', 'C', 'G', 'T' ) {
                    next if $tmp{$_};
                    $self->{'transl'}->{$_}->{'freq'} += ( $diff / 3 );
                }
            }
            elsif ( $freq and ( $freq <= 0 or $freq >= 1 ) ) {
                $self->throw( "Frequency must be between 0 and 1" );
            }
            return $self->{'transl'}->{$nuc}->{'freq'};
        }
        else {
            my $disambiguated_freq = 0;
            foreach ( @{ $Bio::Tools::IUPAC::IUB{$nuc} } ) {
                $disambiguated_freq += $self->{'transl'}->{$_}->{'freq'};
            }
            return $disambiguated_freq;
        }
    }
    else {
        $self->throw( "Nucleotide \"$nuc\" does not exist" );
    }
}

# gets/sets substitution probabilities.
# example: $model->prob( 'A' => 'C' ) returns 0.25
# example: $model->prob( 'A' => 'N' ) returns 1
# example: $model->prob( 'A' => 'C', 0.5 ) scales others accordingly
# does not adjust when ambiguous symbols used, e.g.
# $model->prob( 'A' => 'N', 0.5 ) doesn't work
sub prob {
    my ( $self, $from, $to, $p ) = ( $_[0], uc $_[1], uc $_[2], $_[3] );
    if ( exists $self->{'transl'}{$from} and exists $self->{'transl'}{$to} ) {
        my ( $i, $j ) = ( $self->{'transl'}{$from}{'id'}, $self->{'transl'}{$to}{'id'} );
        if ( $p and $p > 0 and $p < 1 ) {
            my $mat = $self->{'matrix'};
            my $scale = $mat->[$i][$j] - $p;
            $mat->[$i][$j] = $p;
            for my $f ( 0 .. 3 ) {
                for my $t ( 0 .. 3 ) {
                    next if $f == $i and $t == $j;
                    $mat->[$f][$t] += ( $scale / 3 ) if $f == $i or $t == $j;
                }
            }
            for my $f ( 0 .. 3 ) {
                next if $f == $i;
                my $rowscale = 1 - sum @{ $mat->[$f] };
                for my $t ( 0 .. 3 ) {
                    next if $t == $j;
                    $mat->[$f][$t] += ( $rowscale / 3 );
                }
            }            
        }
        elsif ( $p and ( $p <= 0 or $p >= 1 ) ) {
            $self->throw( "Probability must be between 0 and 1" );
        }
        return $self->{'matrix'}[$i][$j];        
    }
    else {
        my $disambiguated_p = 0;
        if ( exists $Bio::Tools::IUPAC::IUB{$from} and exists $Bio::Tools::IUPAC::IUB{$to} ) {
            my $from_array = $Bio::Tools::IUPAC::IUB{$from};
            my $to_array = $Bio::Tools::IUPAC::IUB{$to};
            foreach my $from_sym ( @{ $from_array } ) {
                my $i = $self->{'transl'}{$from_sym}{'id'};
                foreach my $to_sym ( @{ $to_array } ) {
                    my $j = $self->{'transl'}{$to_sym}{'id'};
                    $disambiguated_p += $self->{'matrix'}[$i][$j]
                }
            }
            return $disambiguated_p / scalar @{ $from_array };
        }
        else {
            $self->throw( "Nucleotide \"$from\" and/or \"$to\" not in IUPAC::IUB" );
        }
    }
}

# gets/sets mutation rate.
# example: $model->mu() returns 1;
# example: $model->mu( 0.5 ) doubles diagonal, scales others accordingly
sub mu {
    my ( $self, $mu ) = @_;
    if ( $mu ) {
        $mu = 1 / $mu;
        my $scale = $mu - $self->{'mu'};
        my $mat = $self->{'matrix'};
        for my $i ( 0 .. $#{ $mat } ) {
            for my $j ( 0 .. $#{ $mat->[$i] } ) {
                if ( $i == $j ) {
                    $mat->[$i][$j] = $mat->[$i][$j] + $mat->[$i][$j] * $scale;
                }
                else {
                    $mat->[$i][$j] = $mat->[$i][$j] - ( $mat->[$i][$j] * $scale ) / 3;
                }
            }
        }        
        $self->{'mu'} = $mu;
    }
    return $self->{'mu'};
}

# gets/sets gamma shape parameter alpha
sub gshape {
    my ( $self, $gshape ) = @_;
    $self->{'gshape'} = $gshape if $gshape;
    return $self->{'gshape'};
}

# gets/sets proportion of invariant sites
sub pinvar {
    my ( $self, $pinvar ) = @_;
    $self->{'pinvar'} = $pinvar if $pinvar;
    return $self->{'pinvar'};
}

# other methods: raw transition matrix input, raw frequency input, export to
# and import from MrBayes/Paup/whatever model syntax, also needs pod and tests

1;