[Biojava-l] Reading frames and amino acids
mark.schreiber at group.novartis.com
mark.schreiber at group.novartis.com
Sun Mar 7 20:00:34 EST 2004
Thanks -
Give me a few days and I'll put this up. If I don't please nag me :)
- Mark
facemann <off2w0rk at yahoo.com>
Sent by: biojava-l-bounces at portal.open-bio.org
03/06/2004 11:09 AM
To: biojava-l at biojava.org
cc:
Subject: [Biojava-l] Reading frames and amino acids
Here is a small contribution. I use it to find simple motifs. Feel free
to edit or scrap.
/**
*MotifLister.java
*Andy Hammer
*08 Aug 2003
*Lists all instances of a motif in specified (dna\rna\protein) fasta
file.
*The motif can contain Ambiguity symbols
*Lists the ORF title and position of motif
*Outputs a list of counts to stdout.
*/
import java.io.*;
import java.util.*;
import java.util.regex.*;
import org.biojava.bio.*;
import org.biojava.bio.seq.*;
import org.biojava.bio.seq.io.*;
import org.biojava.bio.symbol.*;
public class MotifLister{
public MotifLister(String type, String inputFile, String target, String
placement)throws Exception{
System.out.println("MotifLister is searching file " + inputFile + "
for the motif '" + target +
"' in frame " + placement + ".");
try{
if(type.equalsIgnoreCase("dna")){
motif = DNATools.createDNA(target);
}else if(type.equalsIgnoreCase("rna")){
motif = RNATools.createRNA(target);
}else{
motif = ProteinTools.createProtein(target);
}
}
catch(BioError e){
System.out.println("Error!! Data type must match type of motif.");
System.out.println("Specifically, " + target + " is not " + type);
System.exit(0);
}
Pattern p = Pattern.compile( MotifTools.createRegex(motif) );
frame = Integer.parseInt(placement);
if(frame < 0 || frame > 3){
System.out.println("Only frames 0 through 3 are alloweds");
System.out.println("frame zero searches all frames.");
System.exit(0);
}
count = 0;
//read the file
//input
FileInputStream fis = new FileInputStream(inputFile);
InputStreamReader isr = new InputStreamReader(fis);
BufferedReader input = new BufferedReader(isr);
try{
if(type.equalsIgnoreCase("dna")){
si = SeqIOTools.readFastaDNA(input);
}else if(type.equalsIgnoreCase("rna")){
si = SeqIOTools.readFastaRNA(input);
}else{
si = SeqIOTools.readFastaProtein(input);
}
while (si.hasNext()){
Sequence seq = si.nextSequence();
Matcher matcher = p.matcher(seq.seqString());
int start = 0;
while(matcher.find(start)) {
start = matcher.start();
int end = matcher.end();
int result = (start % 3) + 1;
if(result == frame || frame == 0){
System.out.println(seq.getName() + " : " + "[" + (start + 1) +
"," + (end) + "]");
count++;
}
start++;
}
}
input.close(); //close the file
System.out.println("Total Hits = " + count);
}
catch(BioException e){
System.out.println(inputFile + " is not a " + type + " file.");
System.out.println(e);
}
}
public static void main(String[] args)throws Exception{
if (args.length < 4) {
System.err.println(" Usage: >java -jar MotifLister.jar type
fastaFile motif frame" +
"\n Ex: >java -jar MotifLister.jar dna
eColi.fasta AAAAAAG 3 > output.txt" +
"\n would search for A AAA AAG in the third frame
in dna file eColi.fasta" +
"\n and print the results to file output.txt." +
"\n 'type' can be dna, rna, or protein." +
"\n 'frame' can be integers 0 through 3." +
"\n 0 counts any instance of the motif." +
"\n 1, 2, 3 counts only instances of the motif in
the specified frame." +
"\n Capture output with redirection operator
'>'.");
}else{
MotifLister ML = new MotifLister(args[0], args[1], args[2],
args[3]);
}
}
private SymbolList motif;
private int frame;
private int count;
private SequenceIterator si;
}
---------------------------------
Do you Yahoo!?
Yahoo! Search - Find what you're looking for
faster._______________________________________________
Biojava-l mailing list - Biojava-l at biojava.org
http://biojava.org/mailman/listinfo/biojava-l
More information about the Biojava-l
mailing list