[Biojava-dev] Script for cookbook

Richard Holland holland at eaglegenomics.com
Wed Jan 13 13:22:16 UTC 2010


Just a few minor points, otherwise all good:

 1. Your 'trying EMBL' code just tries Genbank again!
 2. Typo in the FileNotFoundException message.
 3. Check to see that your ComparableTerm retrieval statements using RichObjectFactory are not already defined as constants in RichSequence.Terms or GenbankFormat.Terms. If they are, use the constants instead as this makes the code clearer.
 4. Your system.out line that prints the results has Chr1 hardcoded - should this be a parameter, or read from the file maybe?

cheers,
Richard

On 13 Jan 2010, at 12:49, Jolyon Holdstock wrote:

> Hi,
> 
> 
> 
> I was going to add the following script to the annotation section of the
> cookbook. 
> 
> It takes an EMBL or Genbank file and outputs information about each CDS
> feature.
> 
> 
> 
> I just wanted to check this was OK and also is this the most efficient
> way of doing this?
> 
> 
> 
> Cheers,
> 
> 
> 
> Jolyon
> 
> 
> 
> [CODE]
> 
> import java.io.*;
> 
> import java.util.*;
> 
> import org.biojava.bio.*;
> 
> import org.biojava.bio.seq.*;
> 
> import org.biojava.bio.seq.io.*;
> 
> import org.biojavax.*;
> 
> import org.biojavax.ontology.*;
> 
> import org.biojavax.bio.*;
> 
> import org.biojavax.bio.seq.*;
> 
> 
> 
> public class ExtractInformation {
> 
>  //Create the RichSequence object
> 
>  RichSequence richSeq;
> 
>  public ExtractInformation(String fileName){
> 
>    //Load the sequence file
> 
>    try {
> 
>      richSeq = RichSequence.IOTools.readGenbankDNA(new
> BufferedReader(new FileReader(fileName)),null).nextRichSequence();
> 
>    }
> 
>    catch(FileNotFoundException fnfe){
> 
>      System.out.println("FilwNotFoundException: " + fnfe);
> 
>    }
> 
>    catch(BioException bioe1){
> 
>      System.err.println("Not a Genbank sequence trying EMBL");
> 
>      try  {
> 
>        richSeq = RichSequence.IOTools.readGenbankDNA(new
> BufferedReader(new FileReader(fileName)),null).nextRichSequence();
> 
>      }
> 
>      catch(BioException bioe2){
> 
>        System.err.println("Not an EMBL sequence either");
> 
>        System.exit(1);
> 
>      }
> 
>      catch(FileNotFoundException fnfe){
> 
>        System.out.println("FilwNotFoundException: " + fnfe);
> 
>      }
> 
>    }
> 
>    //Filter the sequence on CDS features
> 
>    FeatureFilter ff = new FeatureFilter.ByType("CDS");
> 
>    FeatureHolder fh = richSeq.filter(ff);
> 
> 
> 
>    //Iterate through the CDS features
> 
>    for (Iterator <RichFeature> i = fh.features(); i.hasNext();){
> 
>      RichFeature rf = i.next();
> 
> 
> 
>      //Get the strand orientation of the feature
> 
>      char featureStrand = rf.getStrand().getToken();
> 
> 
> 
>      //Get the location of the feature
> 
>      String featureLocation = rf.getLocation().toString();
> 
> 
> 
>      //Get the annotation of the feature
> 
>      RichAnnotation ra = (RichAnnotation)rf.getAnnotation();
> 
> 
> 
>      //Create the required ComparableTerms
> 
>      ComparableTerm geneTerm =
> RichObjectFactory.getDefaultOntology().getOrCreateTerm("gene");
> 
>      ComparableTerm locusTerm =
> RichObjectFactory.getDefaultOntology().getOrCreateTerm("locus_tag");
> 
>      ComparableTerm productTerm =
> RichObjectFactory.getDefaultOntology().getOrCreateTerm("product");
> 
>      ComparableTerm synonymTerm =
> RichObjectFactory.getDefaultOntology().getOrCreateTerm("gene_synonym");
> 
>      ComparableTerm proteinIDTerm =
> RichObjectFactory.getDefaultOntology().getOrCreateTerm("protein_id");
> 
> 
> 
>      //Create empty strings
> 
>      String gene = "";
> 
>      String locus = "";
> 
>      String product = "";
> 
>      String geneSynonym = "";
> 
>      String proteinID = "";
> 
> 
> 
>      //Iterate through the notes in the annotation 
> 
>      for (Iterator <Note> it = ra.getNoteSet().iterator();
> it.hasNext();){
> 
>        Note note = it.next();
> 
> 
> 
>      //Check each note to see if it matches one of the required
> ComparableTerms
> 
>        if(note.getTerm().equals(locusTerm)){
> 
>          locus = note.getValue().toString();
> 
>        }
> 
>        if(note.getTerm().equals(productTerm)){
> 
>          product = note.getValue().toString();
> 
>        }
> 
>        if(note.getTerm().equals(geneTerm)){
> 
>          gene = note.getValue().toString();
> 
>        }
> 
>        if(note.getTerm().equals(synonymTerm)){
> 
>          geneSynonym = note.getValue().toString();
> 
>        }
> 
>        if(note.getTerm().equals(proteinIDTerm)){
> 
>          proteinID = note.getValue().toString();
> 
>        }
> 
>      }
> 
>      //Outout the feature information
> 
>      System.out.println(locus + "  " + gene + "  " + geneSynonym + "  "
> + proteinID + "  " + product +"  " + featureStrand +"  Chr1:" +
> featureLocation);
> 
>    }
> 
>  }
> 
> 
> 
>  public static void main(String args []){
> 
>    if (args.length != 1){
> 
>      System.out.println("Usage: java ExtractInformation <file in
> Genbank or EMBL format>");
> 
>      System.exit(1);
> 
>    }
> 
>    else {
> 
>      new ExtractInformation(args[0]);
> 
>    }
> 
>  }
> 
> }
> 
> [/CODE]
> 
> 
> 
> Dr. Jolyon Holdstock                                       
> Senior Computational Biologist,
> 
> Oxford Gene Technology,                                   
> Begbroke Science Park,                                     
> Sandy Lane, Yarnton,                                           
> Oxford, OX5 1PF, UK.                                             
> 
> T: +44 (0)1865 856 852                                     
> F: +44 (0)1865 842 116                                     
> E: jolyon.holdstock at ogt.co.uk <mailto:nicola.booton-mander at ogt.co.uk>
> 
> W: www.ogt.co.uk <blocked::http://www.ogt.co.uk/>     
> 
> 
> 
> Looking to outsource your microarray studies? Look no further.
> Click here to tour our facilities
> <http://www.ogt.co.uk/highthroughputservices.html> 
> 
> Click here to request a quotation
> <http://www.ogt.co.uk/htsquotationrequest.asp> 
> 
> 
> 
> Scientific pedigree delivering high quality microarray results to you:
> 
> *         Service capacity >1000 samples per week
> 
> *         Rigorous QC <http://www.ogt.co.uk/hts_qc.html>  from sample to
> result
> 
> *         Applications <http://www.ogt.co.uk/hts_apps.html>  available
> include aCGH, CNV, methylation studies and miRNA
> 
> 
> 
> Oxford Gene Technology (Operations) Ltd. Registered in England No:
> 03845432 Begbroke Science Park Sandy Lane Yarnton Oxford OX5 1PF  
> 
> Confidentiality Notice: The contents of this email from Oxford Gene
> Technology are confidential and intended solely for the person to whom
> it is addressed. It may contain privileged and confidential information.
> If you are not the intended recipient you must not read, copy,
> distribute, discuss or take any action in reliance on it. If you have
> received this email in error please advise the sender so that we can
> arrange for proper delivery. Then please delete the message from your
> inbox. Thank you.
> 
> 
> 
> 
> 
> _______________________________________________
> biojava-dev mailing list
> biojava-dev at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/biojava-dev

--
Richard Holland, BSc MBCS
Operations and Delivery Director, Eagle Genomics Ltd
T: +44 (0)1223 654481 ext 3 | E: holland at eaglegenomics.com
http://www.eaglegenomics.com/





More information about the biojava-dev mailing list