[Biojava-dev] fixed and streamlined SmithWaterman.java
mark.schreiber at novartis.com
mark.schreiber at novartis.com
Fri Jun 1 01:32:32 UTC 2007
Hi -
Could the person who maintains the SW code (Andreas Drager??) take a look
at this and check it in if required?
Thanks.
- Mark
"Fred Long" <FLong at skcc.org>
Sent by: biojava-dev-bounces at lists.open-bio.org
05/08/2007 03:15 PM
To: <biojava-dev at biojava.org>
cc: (bcc: Mark Schreiber/GP/Novartis)
Subject: [Biojava-dev] fixed and streamlined SmithWaterman.java
I found that SmithWaterman was calling super() with the wrong
parameters, which caused getMatch() to return the wrong value. While
fixing that I removed a lot of the redundancy from SmithWaterman. Here
is the patch file is anybody is interested. Disclaimer: I'm not a
BioJava developer.
FL
------------------------------------
*** old/NeedlemanWunsch.java 2007-05-08 00:05:20.000000000
-0700
--- NeedlemanWunsch.java 2007-05-08 00:05:38.000000000
-0700
***************
*** 63,69 ****
protected SubstitutionMatrix subMatrix;
protected Alignment pairalign;
protected String alignment;
! private double insert, delete, gapExt, match, replace;
/** Constructs a new Object with the given parameters based on the
Needleman-Wunsch algorithm
--- 63,69 ----
protected SubstitutionMatrix subMatrix;
protected Alignment pairalign;
protected String alignment;
! protected double insert, delete, gapExt, match, replace;
/** Constructs a new Object with the given parameters based on the
Needleman-Wunsch algorithm
***************
*** 517,523 ****
* target sequence
* @return The score for the given substitution.
*/
! private double matchReplace(Sequence query, Sequence subject, int i,
int j) {
try {
return subMatrix.getValueAt(query.symbolAt(i),
subject.symbolAt(j));
} catch (Exception exc) {
--- 517,523 ----
* target sequence
* @return The score for the given substitution.
*/
! protected double matchReplace(Sequence query, Sequence subject, int
i, int j) {
try {
return subMatrix.getValueAt(query.symbolAt(i),
subject.symbolAt(j));
} catch (Exception exc) {
*** old/SmithWaterman.java 2007-05-08 00:05:20.000000000
-0700
--- SmithWaterman.java 2007-05-08 00:05:38.000000000 -0700
***************
*** 53,141 ****
public class SmithWaterman extends NeedlemanWunsch
{
- private double match, replace, insert, delete, gapExt;
private double[][] scoreMatrix;
/** Constructs the new SmithWaterman alignment object. Alignments
are only performed,
* if the alphabet of the given <code>SubstitutionMatrix</code>
equals the alpabet of
! * both the query and the target <code>Sequence</code>. The
alignment parameters here
! * are expenses and not scores as they are in the
<code>NeedlemanWunsch</code> object.
! * scores are just given by multipliing the expenses with
<code>(-1)</code>. For example
! * you could use parameters like "-2, 5, 3, 3, 0". If the expenses
for gap extension
* are equal to the cost of starting a gap (delete or insert), no
affine gap penalties
* are used, which saves memory.
*
! * @param match expenses for a match
! * @param replace expenses for a replace operation
! * @param insert expenses for a gap opening in the query sequence
! * @param delete expenses for a gap opening in the target sequence
! * @param gapExtend expenses for the extension of a gap which was
started earlier.
* @param matrix the <code>SubstitutionMatrix</code> object to use.
*/
public SmithWaterman(double match, double replace, double insert,
double delete, double gapExtend, SubstitutionMatrix matrix)
{
! super(insert, delete, gapExtend, match, replace, matrix);
! this.match = -match;
! this.replace = -replace;
! this.insert = -insert;
! this.delete = -delete;
! this.gapExt = -gapExtend;
! this.subMatrix = matrix;
! this.alignment = "";
}
- /** Overrides the method inherited from the NeedlemanWunsch and
- * sets the penalty for an insert operation to the specified value.
- * Reason: internaly scores are used instead of penalties so that
- * the value is muliplied with -1.
- * @param ins costs for a single insert operation
- */
- public void setInsert(double ins) {
- this.insert = -ins;
- }
-
- /** Overrides the method inherited from the NeedlemanWunsch and
- * sets the penalty for a delete operation to the specified value.
- * Reason: internaly scores are used instead of penalties so that
- * the value is muliplied with -1.
- * @param del costs for a single deletion operation
- */
- public void setDelete(double del) {
- this.delete = -del;
- }
-
- /** Overrides the method inherited from the NeedlemanWunsch and
- * sets the penalty for an extension of any gap (insert or delete)
to the
- * specified value.
- * Reason: internaly scores are used instead of penalties so that
- * the value is muliplied with -1.
- * @param ge costs for any gap extension
- */
- public void setGapExt(double ge) {
- this.gapExt = -ge;
- }
-
- /** Overrides the method inherited from the NeedlemanWunsch and
- * sets the penalty for a match operation to the specified value.
- * Reason: internaly scores are used instead of penalties so that
- * the value is muliplied with -1.
- * @param ma costs for a single match operation
- */
- public void setMatch(double ma) {
- this.match = -ma;
- }
-
- /** Overrides the method inherited from the NeedlemanWunsch and
- * sets the penalty for a replace operation to the specified value.
- * Reason: internaly scores are used instead of penalties so that
- * the value is muliplied with -1.
- * @param rep costs for a single replace operation
- */
- public void setReplace(double rep) {
- this.replace = -rep;
- }
-
/** Overrides the method inherited from the NeedlemanWunsch and
performs only a local alignment.
* It finds only the longest common subsequence. This is good for
the beginning, but it might
--- 53,79 ----
public class SmithWaterman extends NeedlemanWunsch
{
private double[][] scoreMatrix;
/** Constructs the new SmithWaterman alignment object. Alignments
are only performed,
* if the alphabet of the given <code>SubstitutionMatrix</code>
equals the alpabet of
! * both the query and the target <code>Sequence</code>. If the
expenses for gap extension
* are equal to the cost of starting a gap (delete or insert), no
affine gap penalties
* are used, which saves memory.
*
! * @param match expenses for a match (usually < 0)
! * @param replace expenses for a replace operation (usually > 0)
! * @param insert expenses for a gap opening in the query sequence
(usually > 0)
! * @param delete expenses for a gap opening in the target sequence
(usually > 0)
! * @param gapExtend expenses for the extension of a gap which was
started earlier (usually >= 0)
* @param matrix the <code>SubstitutionMatrix</code> object to use.
*/
public SmithWaterman(double match, double replace, double insert,
double delete, double gapExtend, SubstitutionMatrix matrix)
{
! super(match, replace, insert, delete, gapExtend, matrix);
}
/** Overrides the method inherited from the NeedlemanWunsch and
performs only a local alignment.
* It finds only the longest common subsequence. This is good for
the beginning, but it might
***************
*** 186,193 ****
for (i=1; i<=query.length(); i++)
for (j=1; j<=subject.length(); j++)
{
! E[i][j] = Math.max(E[i][j-1], scoreMatrix[i][j-1] +
insert) + gapExt;
! F[i][j] = Math.max(F[i-1][j], scoreMatrix[i-1][j] +
delete) + gapExt;
scoreMatrix[i][j] = max(0.0, E[i][j], F[i][j],
scoreMatrix[i-1][j-1] + matchReplace(query, subject, i, j));
if (scoreMatrix[i][j] > scoreMatrix[maxI][maxJ]) {
--- 124,131 ----
for (i=1; i<=query.length(); i++)
for (j=1; j<=subject.length(); j++)
{
! E[i][j] = Math.max(E[i][j-1], scoreMatrix[i][j-1] -
insert) - gapExt;
! F[i][j] = Math.max(F[i-1][j], scoreMatrix[i-1][j] -
delete) - gapExt;
scoreMatrix[i][j] = max(0.0, E[i][j], F[i][j],
scoreMatrix[i-1][j-1] + matchReplace(query, subject, i, j));
if (scoreMatrix[i][j] > scoreMatrix[maxI][maxJ]) {
***************
*** 223,229 ****
// Insert || finish gap if extended gap is opened
} else if (scoreMatrix[i][j] == E[i][j] ||
gap_extend[0]) {
//check if gap has been extended or freshly opened
! gap_extend[0] = (E[i][j] != scoreMatrix[i][j-1] +
insert + gapExt);
align[0] = '-' + align[0];
align[1] = st.tokenizeSymbol(subject.symbolAt(j--)) +
align[1];
--- 161,167 ----
// Insert || finish gap if extended gap is opened
} else if (scoreMatrix[i][j] == E[i][j] ||
gap_extend[0]) {
//check if gap has been extended or freshly opened
! gap_extend[0] = (E[i][j] != scoreMatrix[i][j-1] -
insert - gapExt);
align[0] = '-' + align[0];
align[1] = st.tokenizeSymbol(subject.symbolAt(j--)) +
align[1];
***************
*** 232,238 ****
// Delete || finish gap if extended gap is opened
} else {
//check if gap has been extended or freshly opened
! gap_extend[1] = (F[i][j] != scoreMatrix[i-1][j] +
delete + gapExt);
align[0] = st.tokenizeSymbol(query.symbolAt(i--)) +
align[0];
align[1] = '-' + align[1];
--- 170,176 ----
// Delete || finish gap if extended gap is opened
} else {
//check if gap has been extended or freshly opened
! gap_extend[1] = (F[i][j] != scoreMatrix[i-1][j] -
delete - gapExt);
align[0] = st.tokenizeSymbol(query.symbolAt(i--)) +
align[0];
align[1] = '-' + align[1];
***************
*** 257,264 ****
scoreMatrix[i][j] = max(
0.0,
! scoreMatrix[i-1][j] + delete,
! scoreMatrix[i][j-1] + insert,
scoreMatrix[i-1][j-1] + matchReplace(query, subject, i,
j)
);
--- 195,202 ----
scoreMatrix[i][j] = max(
0.0,
! scoreMatrix[i-1][j] - delete,
! scoreMatrix[i][j-1] - insert,
scoreMatrix[i-1][j-1] + matchReplace(query, subject, i,
j)
);
***************
*** 291,297 ****
align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
// Insert
! } else if (scoreMatrix[i][j] == scoreMatrix[i][j-1] +
insert) {
align[0] = '-' + align[0];
align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
path = ' ' + path;
--- 229,235 ----
align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
// Insert
! } else if (scoreMatrix[i][j] == scoreMatrix[i][j-1] -
insert) {
align[0] = '-' + align[0];
align[1] = st.tokenizeSymbol(subject.symbolAt(j--))
+ align[1];
path = ' ' + path;
***************
*** 382,409 ****
if ((y > z)) return y;
return z;
}
-
- /** This method computes the scores for the substution of the i-th
symbol
- * of query by the j-th symbol of subject.
- *
- * @param query The query sequence
- * @param subject The target sequence
- * @param i The position of the symbol under consideration within
the
- * query sequence (starting from one)
- * @param j The position of the symbol under consideration within
the
- * target sequence
- * @return The score for the given substitution.
- */
- private double matchReplace(Sequence query, Sequence subject, int i,
int j) {
- try {
- return subMatrix.getValueAt(query.symbolAt(i),
subject.symbolAt(j));
- } catch (Exception exc) {
- if (query.symbolAt(i).getMatches().contains(subject.symbolAt(j))
||
- subject.symbolAt(j).getMatches().contains(query.symbolAt(i)))
- return match;
- return replace;
- }
- }
-
-
}
--- 320,323 ----
_______________________________________________
biojava-dev mailing list
biojava-dev at lists.open-bio.org
http://lists.open-bio.org/mailman/listinfo/biojava-dev
More information about the biojava-dev
mailing list