[Biojava-dev] First draft of a remote blast service class
Sylvain Foisy
sylvain.foisy at diploide.net
Thu Jun 11 13:52:01 UTC 2009
Hi to all,
I've been working on this for the past week or so and after discussing this
with Andreas, I am putting my code here for critical review. I'll put this
stuff in biojava-live as soon as Andreas can fix my SVN access.
First, an interface called RemotePairwiseAlignementSerivce defines the basic
components of a remote service: sequence/database/progam/run options/output
options. RemoteQBlastService implements this interface and runs remote
Qblast requests and creates output in either text, XML or HTML. At present
time, regular blastall programs work, no blastpgp/megablast support yet.
I'll need some guidance to make it work on other type of web services like
EBI.
Best regards
Sylvain
===================================================================
Sylvain Foisy, Ph. D.
Consultant Bio-informatique / Bioinformatics
Diploide.net - TI pour la vie / IT for Life
Courriel: sylvain.foisy at diploide.net
Web: http://www.diploide.net
Tel: (514) 893-4363
===================================================================
import java.io.InputStream;
import org.biojava.bio.BioException;
/**
* This interface specifies minimal information needed to execute a pairwise
alignment on a remote service.
*
* Example of service: QBlast service at NCBI
* Web Service at EBI
*
* @author Sylvain Foisy
* @since 1.8
*
*/
public interface RemotePairwiseAlignementService {
/**
* This field specifies that the output format of results
* is text.
*
*/
public static final int TEXT = 0;
/**
* This field specifies that the output format of results
* is XML.
*
*/
public static final int XML = 1;
/**
* This field specifies that the output format of results
* is HTML.
*
*/
public static final int HTML = 2;
/**
* Setting the database to use for doing the pairwise alignment
*
* @param db: a <code>String</code> with a valid database ID for the
service used.
*
*/
public void setDatabase(String db);
/**
* Setting the sequence to be align for this for this request
*
* @param seq: a <code>String</code> with a sequence to be aligned.
*
*/
public void setSequence(String seq);
/**
* Setting the program to use for this pairwise alignment
*
* @param prog: a <code>String</code> with a valid database ID for the
service used.
*
*/
public void setProgram(String prog);
/**
* Setting all other options to use for this pairwise alignment
*
* @param db: a <code>String</code> with a valid database ID for the
service used.
*
*/
public void setAdvancedOptions(String str);
/**
* Doing the actual analysis on the instantiated service
*
* @throws BioException
*/
public void executeSearch() throws BioException;
/**
* Getting the actual alignment results from this instantiated service
*
* @return : an <code>InputStream</code> with the actual alignment
results
* @throws BioException
*/
public InputStream getAlignmentResults() throws BioException;
}
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import org.biojava.bio.BioException;
/**
* RemoteQBlastService - A simple way of submitting BLAST request to the
QBlast
* service at NCBI.
*
* <p>
* NCBI provides a Blast server through a CGI-BIN interface.
RemoteQBlastService simply
* encapsulates an access to it by giving users access to get/set methods to
fix
* sequence, program and database as well as advanced options.
* </p>
*
* <p>
* As of version 1.0, only blastall programs are usable. blastpgp and
megablast are high-priorities.
* </p>
*
* @author Sylvain Foisy
* @version 1.0
* @since 1.8
*
*
*/
public class RemoteQBlastService implements RemotePairwiseAlignementService{
// public static final int TEXT = 0;
// public static final int XML = 1;
// public static final int HTML = 2;
private static String baseurl =
"http://www.ncbi.nlm.nih.gov/blast/Blast.cgi";
private URL aUrl;
private URLConnection uConn;
private OutputStreamWriter fromQBlast;
private BufferedReader rd;
private String seq = null;
private String prog = null;
private String db = null;
private String outputFormat = null;
private String advanced = null;
private String rid;
private long step;
private boolean done = false;
private long start;
public RemoteQBlastService() throws BioException {
try {
aUrl = new URL(baseurl);
uConn = setQBlastProperties(aUrl.openConnection());
outputFormat = "Text";
}
/*
* Needed but should never be thrown since the URL is static and
known to exist
*/
catch (MalformedURLException e) {
throw new BioException("It looks like the URL for NCBI QBlast
service is bad");
}
/*
* Intercept if the program can't connect to QBlast service
*/
catch (IOException e) {
throw new BioException(
"Impossible to connect to QBlast service at this time.
Check your network connection");
}
}
/**
* This method execute the Blast request via the Put command of the
CGI-BIN
* interface. It gets the estimated time of completion by capturing the
* value of the RTOE variable and sets a loop that will check for
completion
* of analysis at intervals specified by RTOE.
*
* <p>
* It also capture the value for the RID variable, necessary for
fetching
* the actual results after completion.
* </p>
*
* @throws BioException
* if it is not possible to sent the BLAST command
*/
public void executeSearch() throws BioException {
if (seq == null || db == null || prog == null) {
throw new BioException(
"Impossible to execute QBlast request. One or more of
seq|db|prog has not been set");
}
/*
* sending the command to execute the Blast analysis
*/
String cmd = "CMD=Put&SERVICE=plain" + "&" + seq + "&" + prog + "&"
+ db + "&" + "FORMAT_TYPE=HTML";
if (advanced != null) {
cmd += cmd + "&" + advanced;
}
try {
uConn = setQBlastProperties(aUrl.openConnection());
fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
fromQBlast.write(cmd);
fromQBlast.flush();
// Get the response
rd = new BufferedReader(new InputStreamReader(uConn
.getInputStream()));
String line = "";
while ((line = rd.readLine()) != null) {
if (line.contains("RID")) {
String[] arr = line.split("=");
rid = arr[1].trim();
} else if (line.contains("RTOE")) {
String[] arr = line.split("=");
step = Long.parseLong(arr[1].trim()) * 1000;
start = System.currentTimeMillis() + step;
}
}
} catch (IOException e) {
throw new BioException(
"Can't submit sequence to BLAST server at this time.");
}
/*
* Getting the info out of the NCBI system
*/
while (!done) {
long prez = System.currentTimeMillis();
done = isReady(rid, prez);
}
}
/**
* <p>This method is used only for the executeBlastSearch method to
check for completion of
* request using the NCBI specified RTOE variable</p>
*
* @param id
* @param present
* @return
*/
private boolean isReady(String id, long present) {
boolean ready = false;
String check = "CMD=Get&RID=" + id;
/*
* If present time is less than the start of the search added to
step
* obtained from NCBI, just do nothing ;-)
*/
if (present < start) {
;
}
/*
* If we are at least step seconds in the future from the actual
call of
* method executeBlastSearch()
*/
else {
try {
uConn = setQBlastProperties(aUrl.openConnection());
fromQBlast = new
OutputStreamWriter(uConn.getOutputStream());
fromQBlast.write(check);
fromQBlast.flush();
rd = new BufferedReader(new InputStreamReader(uConn
.getInputStream()));
String line = "";
while ((line = rd.readLine()) != null) {
if (line.contains("READY")) {
ready = true;
} else if (line.contains("WAITING")) {
/*
* Else, move start forward in time...
*/
start = present + step;
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
return ready;
}
/**
* <p>This method extracts this actual Blast report. The default format
is Text but can be changed before with the method
* setQBlastOutputFormat.</p>
*
*
* @return
* @throws BioException
*/
public InputStream getAlignmentResults() throws BioException {
String srid = "CMD=Get&RID=" + rid;
srid += "&FORMAT_TYPE=" + outputFormat;
if(!this.done){
throw new BioException("Unable to get report at this time. Your
Blast request has not been processed yet.");
}
try {
uConn = setQBlastProperties(aUrl.openConnection());
fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
fromQBlast.write(srid);
fromQBlast.flush();
return uConn.getInputStream();
} catch (IOException ioe) {
throw new BioException(
"It is not possible to fetch Blast report from NCBI at
this time");
}
}
/**
* <p>
* Set the sequence to be blasted using the String that correspond to
the
* sequence.
* </p>
*
* <p>
* Take note that this method is mutually exclusive to setGIToBlast()
for a
* given Blast request.
* </p>
*
* @param aStr
* : a String with the sequence
*/
public void setSequence(String aStr) {
this.seq = "QUERY=" + aStr;
}
/**
* Simply return a string with the blasted sequence.
*
* @return seq : a string with the sequence
*/
public String getSeqToBlast() {
return this.seq;
}
/**
* <p>
* Set the sequence to be blasted using the NCBI GI value. At this time,
* there is no effort made to check the validity of this GI.
* </p>
*
* <p>
* Take note that this method is mutually exclusive to setSeqToBlast()
for a
* given Blast request.
* </p>
*
* @param gi
* : an integer value representing a NCBI GI
*/
public void setGIToBlast(String gi) {
this.seq = "QUERY=" + gi;
}
/**
* <p>
* Simply return a string with the sequence blasted.
* </p>
*
* @return GI : a String with the GI of the blasted sequence
*/
public String getGIToBlast() {
return this.seq;
}
/**
* <p>
* This method set the program to be used to blast the given
sequence/GI. At
* this time, there is no attempt at checking the matching of sequence
type
* to program.
* </p>
*
* @param prog
* : a String representing the program specified for this
QBlast
* request.
*
*/
public void setProgram(String prog) {
this.prog = "PROGRAM=" + prog;
}
/**
* <p>
* Simply returns the program used for the given Blast request.
* </p>
*
* @return prog : a String with the program used for this QBlast
request.
*/
public String getProgram() {
return this.prog;
}
/**
* <p>
* This method set the database to be used to blast the given
sequence/GI.
* At this time, there is no attempt at checking the matching of
sequence
* type to database.
* </p>
*
* @param db: a String for the database specified for this QBlast
request
*/
public void setDatabase(String db) {
this.db = "DATABASE=" + db;
}
/**
* <p>
* Simply returns the database used for the given Blast request.
* </p>
*
* @return db: a String with the database used for this QBlast request.
*/
public String getBlastDatabase() {
return this.db;
}
/**
* <p>This method let the user specify which format to use for
generating the output.</p>
*
* @param type:an integer taken from the static constant of this class,
either be TEXT, XML or HTML
*/
public void setQBlastOutputFormat(int type) {
switch (type) {
case 0:
this.outputFormat = "Text";
break;
case 1:
this.outputFormat = "XML";
break;
case 2:
this.outputFormat = "HTML";
break;
}
}
/**
* <p>
* Simply returns the output format used for the given Blast report.
* </p>
*
* @return outputFormat : a String with the format specified for the
QBlast report.
*/
public String getQBlastOutputFormat() {
return this.outputFormat;
}
/**
* <p>This method is to be used if a request is to use non-default
values at submission. According to QBlast info,
* the accepted parameters for PUT requests are:</p>
*
* <ul>
* <li>-G: cost to create a gap. Default = 5 (nuc-nuc) / 11 (protein) /
non-affine for megablast</li>
* <li>-E: Cost to extend a gap. Default = 2 (nuc-nuc) / 1 (protein) /
non-affine for megablast</li>
* <li>-r: integer to reward for match. Default = 1</li>
* <li>-q: negative integer for penalty to allow mismatch. Default =
-3</li>
* <li>-e: expectation value. Default = 10.0</li>
* <li>-W: word size. Default = 3 (proteins) / 11 (nuc-nuc) / 28
(megablast)</li>
* <li>-y: dropoff for blast extensions in bits, using default if not
specified. Default = 20 for blastn, 7 for all others
* (except megablast for which it is not applicable).</li>
* <li>-X: X dropoff value for gapped alignment, in bits. Default = 30
for blastn/megablast, 15 for all others.</li>
* <li>-Z: final X dropoff value for gapped alignement, in bits. Default
= 50 for blastn, 25 for all others
* (except megablast for which it is not applicable)</li>
* <li>-P: equals 0 for multiple hits 1-pass, 1 for single hit 1-pass.
Does not apply to blastn ou megablast.</li>
* <li>-A: multiple hits window size. Default = 0 (for single hit
algorithm)</li>
* <li>-I: number of database sequences to save hits for. Default =
500</li>
* <li>-Y: effective length of the search space. Default = 0 (0
represents using the whole space)</li>
* <li>-z: a real specifying the effective length of the database to
use. Default = 0 (0 represents the real size)</li>
* <li>-c: an integer representing pseudocount constant for PSI-BLAST.
Default = 7</li>
* <li>-F: any filtering directive</li>
* </ul>
*
* <p>You have to be aware that at not moment is there any error
checking on the use of these parameters by this class.</p>
* @param aStr: a String with any number of optional parameters with an
associated value.
*
*/
public void setAdvancedOptions(String aStr) {
this.advanced = "OTHER_ADVANCED=" + aStr;
}
/**
*
* Simply return the string given as argument via
setBlastAdvancedOptions
*
* @return advanced: the string with the advanced options
*/
public String getBlastAdvancedOptions() {
return this.advanced;
}
/**
*
* Simply return the QBlast RID for this specific QBlast request
*
* @return rid: the string with the RID
*/
public String getBlastRID() {
return this.rid;
}
/**
* A simple method to check the availability of the QBlast service
*
* @throws BioException
*/
public void printRemoteBlastInfo() throws BioException {
try {
OutputStreamWriter out = new OutputStreamWriter(uConn
.getOutputStream());
out.write("CMD=Info");
out.flush();
// Get the response
BufferedReader rd = new BufferedReader(new
InputStreamReader(uConn
.getInputStream()));
String line = "";
while ((line = rd.readLine()) != null) {
System.out.println(line);
}
out.close();
rd.close();
} catch (IOException e) {
throw new BioException(
"Impossible to get info from QBlast service at this
time. Check your network connection");
}
}
private URLConnection setQBlastProperties(URLConnection conn) {
URLConnection tmp = conn;
conn.setDoOutput(true);
conn.setUseCaches(false);
tmp.setRequestProperty("User-Agent", "Biojava/RemoteQBlastService");
tmp.setRequestProperty("Connection", "Keep-Alive");
tmp.setRequestProperty("Content-type",
"application/x-www-form-urlencoded");
tmp.setRequestProperty("Content-length", "200");
return tmp;
}
}
More information about the biojava-dev
mailing list