[Biojava-dev] fixed and streamlined SmithWaterman.java

Fred Long FLong at skcc.org
Tue May 8 07:23:00 UTC 2007


I found that SmithWaterman was calling super() with the wrong
parameters, which caused getMatch() to return the wrong value.  While
fixing that I removed a lot of the redundancy from SmithWaterman.  Here
is the patch file is anybody is interested.  Disclaimer: I'm not a
BioJava developer.

FL

------------------------------------



*** old/NeedlemanWunsch.java	2007-05-08 00:05:20.000000000 -0700
--- NeedlemanWunsch.java	2007-05-08 00:05:38.000000000 -0700
***************
*** 63,69 ****
    protected SubstitutionMatrix subMatrix;
    protected Alignment pairalign;
    protected String alignment;
!   private double insert, delete, gapExt, match, replace;
    
    
    /** Constructs a new Object with the given parameters based on the
Needleman-Wunsch algorithm
--- 63,69 ----
    protected SubstitutionMatrix subMatrix;
    protected Alignment pairalign;
    protected String alignment;
!   protected double insert, delete, gapExt, match, replace;
    
    
    /** Constructs a new Object with the given parameters based on the
Needleman-Wunsch algorithm
***************
*** 517,523 ****
     *        target sequence
     * @return The score for the given substitution.
     */
!   private double matchReplace(Sequence query, Sequence subject, int i,
int j) {
      try {
        return subMatrix.getValueAt(query.symbolAt(i),
subject.symbolAt(j));
      } catch (Exception exc) {
--- 517,523 ----
     *        target sequence
     * @return The score for the given substitution.
     */
!   protected double matchReplace(Sequence query, Sequence subject, int
i, int j) {
      try {
        return subMatrix.getValueAt(query.symbolAt(i),
subject.symbolAt(j));
      } catch (Exception exc) {
*** old/SmithWaterman.java	2007-05-08 00:05:20.000000000 -0700
--- SmithWaterman.java	2007-05-08 00:05:38.000000000 -0700
***************
*** 53,141 ****
  public class SmithWaterman extends NeedlemanWunsch
  {
  
-   private double match, replace, insert, delete, gapExt;
    private double[][] scoreMatrix;  
    
  
    /** Constructs the new SmithWaterman alignment object. Alignments
are only performed,
      * if the alphabet of the given <code>SubstitutionMatrix</code>
equals the alpabet of
!     * both the query and the target <code>Sequence</code>. The
alignment parameters here
!     * are expenses and not scores as they are in the
<code>NeedlemanWunsch</code> object.
!     * scores are just given by multipliing the expenses with
<code>(-1)</code>. For example
!     * you could use parameters like "-2, 5, 3, 3, 0". If the expenses
for gap extension
      * are equal to the cost of starting a gap (delete or insert), no
affine gap penalties
      * are used, which saves memory.
      *
!     * @param match expenses for a match
!     * @param replace expenses for a replace operation
!     * @param insert expenses for a gap opening in the query sequence
!     * @param delete expenses for a gap opening in the target sequence
!     * @param gapExtend expenses for the extension of a gap which was
started earlier.
      * @param matrix the <code>SubstitutionMatrix</code> object to use.
      */
    public SmithWaterman(double match, double replace, double insert,
double delete, double gapExtend, SubstitutionMatrix matrix)
    {
!     super(insert, delete, gapExtend, match, replace, matrix);
!     this.match      = -match;
!     this.replace    = -replace;
!     this.insert     = -insert;
!     this.delete     = -delete;
!     this.gapExt     = -gapExtend;
!     this.subMatrix  = matrix;
!     this.alignment  = "";
    }
    
-   /** Overrides the method inherited from the NeedlemanWunsch and 
-     * sets the penalty for an insert operation to the specified value.
-     * Reason: internaly scores are used instead of penalties so that 
-     * the value is muliplied with -1.
-     * @param ins costs for a single insert operation
-     */
-   public void setInsert(double ins) {
-     this.insert = -ins;
-   }
-   
-   /** Overrides the method inherited from the NeedlemanWunsch and 
-     * sets the penalty for a delete operation to the specified value. 
-     * Reason: internaly scores are used instead of penalties so that 
-     * the value is muliplied with -1.
-     * @param del costs for a single deletion operation
-     */
-   public void setDelete(double del) {
-     this.delete = -del;
-   }
-   
-   /** Overrides the method inherited from the NeedlemanWunsch and 
-     * sets the penalty for an extension of any gap (insert or delete)
to the 
-     * specified value.
-     * Reason: internaly scores are used instead of penalties so that 
-     * the value is muliplied with -1. 
-     * @param ge costs for any gap extension
-     */
-   public void setGapExt(double ge) {
-     this.gapExt = -ge;
-   }
-   
-   /** Overrides the method inherited from the NeedlemanWunsch and 
-     * sets the penalty for a match operation to the specified value. 
-     * Reason: internaly scores are used instead of penalties so that 
-     * the value is muliplied with -1.
-     * @param ma costs for a single match operation
-     */
-   public void setMatch(double ma) {
-     this.match = -ma;
-   }
-   
-   /** Overrides the method inherited from the NeedlemanWunsch and 
-     * sets the penalty for a replace operation to the specified value.

-     * Reason: internaly scores are used instead of penalties so that 
-     * the value is muliplied with -1.
-     * @param rep costs for a single replace operation
-     */
-   public void setReplace(double rep) {
-     this.replace = -rep;
-   }  
-   
  
    /** Overrides the method inherited from the NeedlemanWunsch and
performs only a local alignment.
      * It finds only the longest common subsequence. This is good for
the beginning, but it might
--- 53,79 ----
  public class SmithWaterman extends NeedlemanWunsch
  {
  
    private double[][] scoreMatrix;  
    
  
    /** Constructs the new SmithWaterman alignment object. Alignments
are only performed,
      * if the alphabet of the given <code>SubstitutionMatrix</code>
equals the alpabet of
!     * both the query and the target <code>Sequence</code>. If the
expenses for gap extension
      * are equal to the cost of starting a gap (delete or insert), no
affine gap penalties
      * are used, which saves memory.
      *
!     * @param match expenses for a match (usually < 0)
!     * @param replace expenses for a replace operation (usually > 0)
!     * @param insert expenses for a gap opening in the query sequence
(usually > 0)
!     * @param delete expenses for a gap opening in the target sequence
(usually > 0)
!     * @param gapExtend expenses for the extension of a gap which was
started earlier (usually >= 0)
      * @param matrix the <code>SubstitutionMatrix</code> object to use.
      */
    public SmithWaterman(double match, double replace, double insert,
double delete, double gapExtend, SubstitutionMatrix matrix)
    {
!     super(match, replace, insert, delete, gapExtend, matrix);
    }
    
  
    /** Overrides the method inherited from the NeedlemanWunsch and
performs only a local alignment.
      * It finds only the longest common subsequence. This is good for
the beginning, but it might
***************
*** 186,193 ****
          for (i=1; i<=query.length();   i++)
            for (j=1; j<=subject.length(); j++)
            {
!             E[i][j] = Math.max(E[i][j-1], scoreMatrix[i][j-1] +
insert) + gapExt;
!             F[i][j] = Math.max(F[i-1][j], scoreMatrix[i-1][j] +
delete) + gapExt;
              scoreMatrix[i][j] = max(0.0, E[i][j], F[i][j],
scoreMatrix[i-1][j-1] + matchReplace(query, subject, i, j));
              
              if (scoreMatrix[i][j] > scoreMatrix[maxI][maxJ]) {
--- 124,131 ----
          for (i=1; i<=query.length();   i++)
            for (j=1; j<=subject.length(); j++)
            {
!             E[i][j] = Math.max(E[i][j-1], scoreMatrix[i][j-1] -
insert) - gapExt;
!             F[i][j] = Math.max(F[i-1][j], scoreMatrix[i-1][j] -
delete) - gapExt;
              scoreMatrix[i][j] = max(0.0, E[i][j], F[i][j],
scoreMatrix[i-1][j-1] + matchReplace(query, subject, i, j));
              
              if (scoreMatrix[i][j] > scoreMatrix[maxI][maxJ]) {
***************
*** 223,229 ****
                // Insert || finish gap if extended gap is opened
                } else if (scoreMatrix[i][j] == E[i][j] ||
gap_extend[0]) {
                  //check if gap has been extended or freshly opened
!                 gap_extend[0] = (E[i][j] != scoreMatrix[i][j-1] +
insert + gapExt);
                  
                  align[0] = '-' + align[0];
                  align[1] = st.tokenizeSymbol(subject.symbolAt(j--)) +
align[1];
--- 161,167 ----
                // Insert || finish gap if extended gap is opened
                } else if (scoreMatrix[i][j] == E[i][j] ||
gap_extend[0]) {
                  //check if gap has been extended or freshly opened
!                 gap_extend[0] = (E[i][j] != scoreMatrix[i][j-1] -
insert - gapExt);
                  
                  align[0] = '-' + align[0];
                  align[1] = st.tokenizeSymbol(subject.symbolAt(j--)) +
align[1];
***************
*** 232,238 ****
                // Delete || finish gap if extended gap is opened
                } else {
                  //check if gap has been extended or freshly opened
!                 gap_extend[1] = (F[i][j] != scoreMatrix[i-1][j] +
delete + gapExt);
                  
                  align[0] = st.tokenizeSymbol(query.symbolAt(i--)) +
align[0];
                  align[1] = '-' + align[1];
--- 170,176 ----
                // Delete || finish gap if extended gap is opened
                } else {
                  //check if gap has been extended or freshly opened
!                 gap_extend[1] = (F[i][j] != scoreMatrix[i-1][j] -
delete - gapExt);
                  
                  align[0] = st.tokenizeSymbol(query.symbolAt(i--)) +
align[0];
                  align[1] = '-' + align[1];
***************
*** 257,264 ****
              
              scoreMatrix[i][j] = max(
                0.0,
!               scoreMatrix[i-1][j]   + delete,
!               scoreMatrix[i][j-1]   + insert,
                scoreMatrix[i-1][j-1] + matchReplace(query, subject, i,
j)
              );
                          
--- 195,202 ----
              
              scoreMatrix[i][j] = max(
                0.0,
!               scoreMatrix[i-1][j]   - delete,
!               scoreMatrix[i][j-1]   - insert,
                scoreMatrix[i-1][j-1] + matchReplace(query, subject, i,
j)
              );
                          
***************
*** 291,297 ****
                    align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
                    
                  // Insert
!                 } else if (scoreMatrix[i][j] == scoreMatrix[i][j-1] +
insert) {
                    align[0] = '-' + align[0];
                    align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
                    path     = ' ' + path;
--- 229,235 ----
                    align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
                    
                  // Insert
!                 } else if (scoreMatrix[i][j] == scoreMatrix[i][j-1] -
insert) {
                    align[0] = '-' + align[0];
                    align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
                    path     = ' ' + path;
***************
*** 382,409 ****
      if ((y > z)) return y;
      return z;
    }
-   
-   /** This method computes the scores for the substution of the i-th
symbol
-    * of query by the j-th symbol of subject. 
-    * 
-    * @param query The query sequence
-    * @param subject The target sequence
-    * @param i The position of the symbol under consideration within
the 
-    *        query sequence (starting from one)
-    * @param j The position of the symbol under consideration within
the
-    *        target sequence
-    * @return The score for the given substitution.
-    */
-   private double matchReplace(Sequence query, Sequence subject, int i,
int j) {
-     try {
-       return subMatrix.getValueAt(query.symbolAt(i),
subject.symbolAt(j));
-     } catch (Exception exc) {
-       if (query.symbolAt(i).getMatches().contains(subject.symbolAt(j))
||
-         subject.symbolAt(j).getMatches().contains(query.symbolAt(i)))
-         return match;
-       return replace;
-     }
-   }
-   
-   
  }
--- 320,323 ----




More information about the biojava-dev mailing list