// Copyright 2008, by North Carolina State University. All rights reserved.

/**
 * 
 * javac -cp  "C:\WorkData\javaCode\;C:\JavaDev\biojava-live\biojava-live.jar;C:\jars\hibernate3.jar;C:\jars\mysql-connector-java-3.1.13-bin.jar"  bioinformatics\biojava\BriefLoader.java
 * 
 * java -Xmx100m -cp  "c:\jars\asm.jar;c:\jars\asm-attrs.jar;c:\jars\cglib-2.1.3.jar;c:\jars\jta.jar;c:\jars\antlr-2.7.6.jar;c:\jars\commons-collections-2.1.1.jar;c:\jars\commons-logging-1.0.4.jar;c:\jars\dom4j-1.6.1.jar;C:\WorkData\javaCode\;C:\JavaDev\biojava-live\biojava-live.jar;C:\jars\hibernate3.jar;C:\jars\mysql-connector-java-3.1.13-bin.jar"  -Djdbc.drivers=com.mysql.jdbc.Driver  bioinformatics.biojava.BriefLoader C:\WorkData\genomes\M_grisea_genbank_v5\CH476760.gb 
 */
package bioinformatics.biojavaTools;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;

import org.biojava.bio.BioException;
import org.biojavax.*;
import org.biojavax.RichObjectFactory;
import org.biojavax.bio.db.biosql.BioSQLRichSequenceDB;
import org.biojavax.bio.seq.*;
import org.biojavax.bio.seq.RichSequence.*;
import org.biojavax.bio.seq.RichSequence.IOTools;
import org.biojavax.bio.seq.io.*;
import org.biojavax.bio.BioEntry;

import org.hibernate.*;
import org.hibernate.cfg.*;

//import java.util.logging.*;
//import org.apache.commons.logging.*;




public class BriefLoader
  {
  static SessionFactory sessionFactory;
  static boolean verbose = false;
  public static void main(String[] args)
    {
      int argPos = 0;
    if ( args.length > 0 && args[argPos].equals( "-v"))
      {
      verbose = true;
      argPos++;
      }
    if ( argPos == args.length)
      {
      System.out.println( "Usage: BriefLoader [-v] file1.bg [file2.gb ...]");
      return;
      }

    BriefLoader bf = new BriefLoader();

    // logging: 
    // I want to suppress the verbose info messages. 
    // After some investigation, it seems that commons 
    // logging uses a default of Jdk14Logger (the standard java logger).
    // Thus, I can control the commons logger via the java.util.logging API.
    // API
    java.util.logging.Logger jdklogger = java.util.logging.Logger.getLogger("org.hibernate");
    jdklogger.setLevel(java.util.logging.Level.WARNING);

    sessionFactory = new
      // Configuration is very expensive and can only be done once.
    //
    // properties can be specified via the file hibernate.properties located 
    // on the classpath, 
    // on the command line via -D,
    // via the defaut "hibernate.cfg.xml" file in the current working directory,
    // via an explicitly identified file, e.g. "specific_file.cfg.xml",
    // via explicit calls to setProperty. The same goes for resources.
    // Later definitions override earlier ones.
        Configuration()
        // Note: 
        // specifying the resource location of the configuration file
        // works ONLY if I also modify the
        // hibernate.cfg.xml file to add the class' package as a prefix
        // for every resource. Hibernate does not understand the
        // concept of relative locations based on the location of the 
        // configuratin file. Nonetheless, this allows me to organize the
        // mapping files based on java packages.
    .configure( bf.getClass().getResource( "hibernate.cfg.xml"))
        // the following should allows use of a generic configuration file
        // by overriding the items that need to be customized.
        //.setProperty( "connection.url", "jdbc:mysql://someMachine.ncsu.edu:3306/M_grisea_genbank_biosql")
        //.setProperty( "connection.username", "aUser")
        //.setProperty( "connection.password", "aPassword")
        // 
        // The following can be used to adjust memory consumption.
        //.setProperty( "hibernate.jdbc.batch_size", "20") 
        //.setProperty( "hibernate.cache.use_second_level_cache", "false") 
        // 
        // examining the generated SQL can be informative...
        //.setProperty( "hibernate.show_sql", "true") 

        // Go and make the support interfaces...
    .buildSessionFactory();

    Session session = null;
    try
      {
      session = bf.doSessionFactoryBindings( sessionFactory);

      while ( argPos < args.length)
        {
        File f = new File( args[argPos++]);

        // handle one level of directories
        if ( f.isDirectory())
          {
          File contents[] = f.listFiles();
          for ( int j=contents.length-1;j>=0;j--)
            bf.loadNSave( session, contents[j]);
          }

        bf.loadNSave( session, f);
        }
      }
    finally
      {
      if ( session != null)
        {
        session.flush();  // force in-memory to disk.
        session.close();  // only for local sessions
        }
      }
    }

  /**
   *  The session is the primary interaction layer between Hibernate and
   * the underlying database. Closely allied with that is a suite of 
   * Biojavax classes handling the load and save operations. These classes
   * are coordinated throught the RichObjectFactory. Thus, correctly 
   * setting up and using the factory for all object operations is 
   * critical!
   */
  Session doSessionFactoryBindings( SessionFactory sessionFactory)
    {
    if ( verbose) System.out.println( "doing bindings.");
    // open a session before processing the files. This allows the
    // session to survive across the multiple transactions. And, hopefully,
    // to provide level-2 caching services for the objects....
    // Contrast with getCurrentSession wich only survives for the current
    // transaction.
    Session session = sessionFactory.openSession();

    // Change DefaultNamespaceName binding from "biojavax"
    RichObjectFactory.setDefaultNamespaceName( "genbankBiosqlRich");

    // Change DefaultOntologyName binding from "lcl". Hopefully, this will
    // map the genbank annotation information into the SO term space. This
    // works for some, but not all features. Those which donto map are
    // flagged as "auto-generated by biojavax".
    // nota bene: may be dangerous if semantics are not exactly the same!
    // examination of the SO mapping indicated that were the terms align, the
    // semantics also align.
    // Benefit is that semantic queries can be performed on the loaded info.
    RichObjectFactory.setDefaultOntologyName( "sequence"); //"SO" );


    // Hook to BiojavaX and BioSQL. This sets up the proper conditions for
    // transparently hooking to the database and supplying objects from 
    // that db if they exist within it. This is accomplished via hooking the 
    // Builder, Resolver, and Handler to BioSQL implementations.
    // Establishes standard bindings for: 
    //    RichObjectBuilder, 
    //    DefaultCrossReferenceResolver,
    //    DefaultRichSequenceHandler.
    // nb: PositionResolver is left as new AverageResolver();
    RichObjectFactory.connectToBioSQL(session);

    // also grab a reference to the underlying database (so that I can use
    // the convenience wrapper methods for delting entries).
    //db = new BioSQLRichSequenceDB( session);   // create the RichSequenceDB wrapper around the Hibernate session

    return session;
    }

  /**
   * This works for genbank files containing multiple sequences.
   * Originaly concept from: http://portal.open-bio.org/pipermail/biojava-l/2007-April/005824.html
   * It fails on inserting existant record(s) - does not replace...
   * This causes grief when loading multiple files...
   */
  public void loadNSave( Session session, File fileName)
    {
    boolean localSession = (session == null);
    Transaction tx = null;
    // ensure that an acceptable session configuration exists.
    if ( session == null) throw new Error( "session object not established");

    // Note the retrieval of namespace VIA the factory. The interface 
    // documentation did not make clear the requirement to use the 
    // established 'singleton' object (via getDefaultNamespace or equivalent). 
    // The underlying hibernate code does not attempt to automatically ensure
    // the uniqueness (singleton) by attempting a load of any instances. 
    // Failure to use the factory will result in attempts to create a 
    // duplicate namespace in the database.
    org.biojavax.Namespace ns = RichObjectFactory.getDefaultNamespace();

    try
      {
      if ( verbose) System.out.println( "*********** Loading "+fileName+"...");
      BufferedReader br = new BufferedReader( new FileReader(  fileName) );

      // readGenbankDNA loads the objects from the stream and uses the 
      // established factory(ies) and defaults for object creation.
      if ( verbose) System.out.println( "*********** readGenbankDNA...");
      RichSequenceIterator rsi = RichSequence.IOTools.readGenbankDNA( br, ns); 

      while ( rsi.hasNext() ) // for each sequence in the file...
        {
        if ( verbose) System.out.println( "*********** start transaction.");
        // Hibernate seems to REQUIRE transactions when objects are modified.
        tx = session.beginTransaction();

        if ( verbose) System.out.println( "*********** Loading next sequence...");
        RichSequence sequence = rsi.nextRichSequence();
        System.out.println( "loaded sequence "+sequence.getAccession()+
          ", identifier: "+ sequence.getIdentifier());

        // delete any extant  sequence from the database. The cascaded 
        // constraints will result in removal of all associated features
        // and what-not.
        Query q = session.createQuery( "from BioEntry as s where s.name = :acc");
        q.setString( "acc", sequence.getAccession());
        BioEntry be = (BioEntry)q.uniqueResult();
        if ( be != null) 
          {
          if ( verbose) System.out.println( "*********** DELETING extant sequence...");
          // Interesintly, hibernate does not seem to do transactions in the
          // same sense that a database would. Thus, I need to commit the 
          // delete operation before I attempt to insert the replacement
          // information.
          session.delete( be);
          tx.commit();
          tx = session.beginTransaction();
          }

        try
          {
          // Loading entire genomes from genbank consumes large amounts
          // of memory. Thus, each sequence and its associated
          // annotations are wrapped in a transaction, the transaction saved, 
          // and the in-memory cache is cleared. While being somewhat 
          // inefficient, this approach does limit memory consumption.
          if ( verbose) System.out.println( "*********** saving...");

          // synchronize in-memory representation w/ the database
          session.saveOrUpdate( "Sequence", sequence );
          if ( verbose) System.out.println( "*********** comitting...");
          tx.commit();    // save to database - does an automatic flush
          // batch operations overwhelm the hibernate cache - clear it out!
          if ( verbose) System.out.println( "*********** flushing...");
          session.flush();  // force in-memory to disk.
          if ( verbose) System.out.println( "*********** clearing...");
          session.clear();  // clean out cache.
          }
        catch (HibernateException ex)
          {
          tx.rollback();   // discard the sequence and all its annotations
          ex.printStackTrace();
          }
        }
      }
    catch (FileNotFoundException ex)
      {
      ex.printStackTrace();
      }
    catch ( BioException bex)
      {
      bex.printStackTrace();
      }
    }

  }

