package ca.gc.phac.aspc.nml;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.biojava.bio.chromatogram.Chromatogram;
import org.biojava.bio.chromatogram.UnsupportedChromatogramFormatException;
import org.biojava.bio.program.abi.ABIFChromatogram;
import org.biojava.bio.program.scf.SCF;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.symbol.Alignment;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.IntegerAlphabet;
import org.biojava.bio.symbol.SimpleAlignment;
import org.biojava.bio.symbol.SimpleSymbolList;
import org.biojava.bio.symbol.Symbol;
import org.biojava.bio.symbol.SymbolList;
import org.biojava.bio.symbol.IntegerAlphabet.IntegerSymbol;
import org.biojava.bio.symbol.IntegerAlphabet.SubIntegerAlphabet;

public class SCFUtils {

	/** the mapping of symbols to printable characters */
	public static final Map<Symbol, Character> SYMBOL_MAPPING;
	/** the mapping of printable characters to ambiguity sets */
	public static final Map<Character, Set<Symbol>> AMBIGUITY_SYMBOLS;

	public static final SubIntegerAlphabet PROBABILITY_ALPHABET = IntegerAlphabet
			.getSubAlphabet(0, 255);
	private static final SimpleDateFormat SCF_COMMENT_RUND_FORMAT = new SimpleDateFormat(
			"yyyyMMdd.HHmmss");
	private static final SimpleDateFormat SCF_COMMENT_DATE_FORMAT = new SimpleDateFormat(
			"EEE dd MMM HH:mm:ss yyyy");

	static {
		AMBIGUITY_SYMBOLS = populateAmbiguitySymbols();
		SYMBOL_MAPPING = populateSymbolMap();
	}

	/**
	 * Given the {@link ExtendedABIFParser} instance, we will try to assign
	 * appropriate scf comments using the ABI comments.
	 * 
	 * @param c
	 *            the parser to use the comments from
	 * @return the comments appropriately formatted for scf
	 */
	public static Properties prepareComments(ExtendedABIFParser c) {
		Properties comments = new Properties();

		comments.put("VER1", c.getDataCollectionSoftwareVersion());
		comments.put("LANE", c.getCapillaryNumber().toString());
		comments.put("BCAL", c.getBasecallerBcpDll());
		comments.put("COMM", c.getComment(1));
		comments.put("SPAC", c.getAvgPeakSpacing().toString());
		comments.put("MACH", c.getInstrumentNameAndSerialNumber());
		comments.put("VER2", c.getBasecallerVersion());
		comments.put("DYEP", c.getMobilityFileOrig());
		comments.put("NAME", c.getSampleName());

		Short[] signalLevels = c.getSignalLevel();
		// // abi looks like: 70 73 90 74
		// // scf looks like: A=73,C=74,G=70,T=90
		comments.put("SIGN", "A=" + signalLevels[1].toString() + ",C="
				+ signalLevels[3].toString() + ",G="
				+ signalLevels[0].toString() + ",T="
				+ signalLevels[2].toString());

		// abi2scfComments.put("MODL1", "MODL1"); // special case
		// // abi looks like: 3730
		// // scf looks like: 730
		comments.put("MODL", c.getModelNumber().substring(1));
		comments.put("RUND", SCF_COMMENT_RUND_FORMAT.format(c
				.getRunStartDateTime())
				+ " - "
				+ SCF_COMMENT_RUND_FORMAT.format(c.getRunStopDateTime()));
		comments.put("DATE", SCF_COMMENT_DATE_FORMAT.format(c
				.getRunStartDateTime())
				+ " to "
				+ SCF_COMMENT_DATE_FORMAT.format(c.getRunStopDateTime()));

		return comments;
	}

	/**
	 * Prepares an {@link ABIFChromatogram} for writing out in SCF format.
	 * 
	 * @param c
	 *            the chromatogram that will be written in SCF format
	 * @return the chromatogram prepared to be written in SCF format
	 * @throws IOException
	 *             if an I/O error occurs
	 * @throws UnsupportedChromatogramFormatException
	 *             if this is not, in fact, an ABIFChromatogram file
	 */
	public static Chromatogram prepareABIFChromatogram(File f)
			throws UnsupportedChromatogramFormatException, IOException {
		// load the file both as a chromatogram and as a parser so that we
		// have access to all fields
		ABIFChromatogram c = ABIFChromatogram.create(f);
		ExtendedABIFParser p = new ExtendedABIFParser(f);

		Alignment align = c.getBaseCalls();
		SymbolList dna = align.symbolListForLabel(Chromatogram.DNA);
		char[] qualityValues = p.getEditedQualityValues();

		Map<Character, List<Symbol>> probabilitySymbols = new HashMap<Character, List<Symbol>>();

		// We need to take the DNA symbol list that's filled by the
		// ABIFChromatogram parser and break that up into the 4 possible bases
		// (A, C, G, T). All of the symbol lists need to be the same size when
		// we're setting them, so we will pre-initialize all of them and then
		// iterate through the DNA symbol list and set the various lists as
		// appropriate.
		List<Symbol> emptySymbolList = new ArrayList<Symbol>();
		try {
			for (int j = 0; j < dna.length(); j++) {
				emptySymbolList.add(PROBABILITY_ALPHABET.getSymbol(0));
			}
		} catch (IllegalSymbolException e) {
			e.printStackTrace();
		}

		for (Character base : new Character[] { 'A', 'C', 'G', 'T', 'N' }) {
			List<Symbol> symbolList = new ArrayList<Symbol>();
			try {
				for (int j = 0; j < dna.length(); j++) {
					symbolList.add(PROBABILITY_ALPHABET.getSymbol(0));
				}
			} catch (IllegalSymbolException e) {
				e.printStackTrace();
			}
			probabilitySymbols.put(base, symbolList);
		}

		@SuppressWarnings("unchecked")
		Iterator<Symbol> symbolIterator = dna.iterator();
		int i = 0;

		while (symbolIterator.hasNext()) {
			Symbol s = symbolIterator.next();
			Character token = SYMBOL_MAPPING.get(s);
			try {
				IntegerSymbol probabilitySymbol = PROBABILITY_ALPHABET
						.getSymbol(qualityValues[i]);
				probabilitySymbols.get(token).set(i, probabilitySymbol);
			} catch (IllegalSymbolException e) {
				e.printStackTrace();
			}

			i++;
		}

		// now prepare the symbol lists for writing back into the chromatogram
		// file. we are just filling the overcall, substitution and undercall
		// probabilities to empty symbol lists here. we might want to
		// investigate actually filling this with information from the abi
		// chromatogram if that data is available.
		Map<String, SymbolList> symbolListMap = new HashMap<String, SymbolList>();
		symbolListMap.put(Chromatogram.DNA.toString(), dna);
		symbolListMap.put(Chromatogram.OFFSETS.toString(), align
				.symbolListForLabel(Chromatogram.OFFSETS));
		symbolListMap.put(SCF.PROB_NUC_A.toString(), symbolListFromMap('A',
				probabilitySymbols));
		symbolListMap.put(SCF.PROB_NUC_C.toString(), symbolListFromMap('C',
				probabilitySymbols));
		symbolListMap.put(SCF.PROB_NUC_G.toString(), symbolListFromMap('G',
				probabilitySymbols));
		symbolListMap.put(SCF.PROB_NUC_T.toString(), symbolListFromMap('T',
				probabilitySymbols));
		try {
			symbolListMap
					.put(SCF.PROB_OVERCALL.toString(), new SimpleSymbolList(
							PROBABILITY_ALPHABET, emptySymbolList));
			symbolListMap
					.put(SCF.PROB_SUBSTITUTION.toString(),
							new SimpleSymbolList(PROBABILITY_ALPHABET,
									emptySymbolList));
			symbolListMap
					.put(SCF.PROB_UNDERCALL.toString(), new SimpleSymbolList(
							PROBABILITY_ALPHABET, emptySymbolList));
		} catch (IllegalSymbolException e) {
			e.printStackTrace();
		}

		Alignment supplementedAlignment = new SimpleAlignment(symbolListMap);
		hackABIFChromatogramAndSetBaseCallAlignment(c, supplementedAlignment);

		return c;
	}

	/**
	 * This is really ugly and bad. I don't deny that. This will probably not
	 * work on system where the security manager has been enabled and set up.
	 * 
	 * @param c
	 *            the chromatogram that we should set the alignment for
	 * @param baseCallAlignment
	 *            the alignment we should set
	 */
	private static void hackABIFChromatogramAndSetBaseCallAlignment(
			ABIFChromatogram c, Alignment baseCallAlignment) {
		// yikes. really bad hack. we're making a method visible as public so
		// that we can set it ourselves. I guess maybe the saving grace is that
		// we're cleaning up after ourselves and re-setting the method to not be
		// accessible. Hah!
		Method[] methods = c.getClass().getSuperclass().getDeclaredMethods();
		for (Method method : methods) {
			if (method.getName().equals("setBaseCallAlignment")) {
				method.setAccessible(true);
				try {
					method.invoke(c, baseCallAlignment);
				} catch (IllegalArgumentException e) {
					e.printStackTrace();
				} catch (IllegalAccessException e) {
					e.printStackTrace();
				} catch (InvocationTargetException e) {
					e.printStackTrace();
				}
				method.setAccessible(false);
				break;
			}
		}
	}

	private static <T> SymbolList symbolListFromMap(T key,
			Map<T, List<Symbol>> map) {
		return new SimpleSymbolList(map.get(key).toArray(
				new Symbol[map.get(key).size()]), map.get(key).size(),
				PROBABILITY_ALPHABET);
	}

	private static Map<Character, Set<Symbol>> populateAmbiguitySymbols() {
		Map<Character, Set<Symbol>> ambiguitySymbols = new HashMap<Character, Set<Symbol>>();
		ambiguitySymbols.put('Y', new HashSet<Symbol>() {

			{
				add(DNATools.c());
				add(DNATools.t());
			}
		});

		ambiguitySymbols.put('R', new HashSet<Symbol>() {

			{
				add(DNATools.a());
				add(DNATools.g());
			}
		});

		ambiguitySymbols.put('W', new HashSet<Symbol>() {

			{
				add(DNATools.a());
				add(DNATools.t());
			}
		});

		ambiguitySymbols.put('S', new HashSet<Symbol>() {

			{
				add(DNATools.c());
				add(DNATools.g());
			}
		});

		ambiguitySymbols.put('M', new HashSet<Symbol>() {

			{
				add(DNATools.c());
				add(DNATools.a());
			}
		});

		ambiguitySymbols.put('K', new HashSet<Symbol>() {

			{
				add(DNATools.g());
				add(DNATools.t());
			}
		});

		ambiguitySymbols.put('H', new HashSet<Symbol>() {

			{
				add(DNATools.c());
				add(DNATools.t());
				add(DNATools.a());
			}
		});

		ambiguitySymbols.put('B', new HashSet<Symbol>() {

			{
				add(DNATools.c());
				add(DNATools.t());
				add(DNATools.c());
			}
		});

		ambiguitySymbols.put('V', new HashSet<Symbol>() {

			{
				add(DNATools.c());
				add(DNATools.g());
				add(DNATools.a());
			}
		});

		ambiguitySymbols.put('D', new HashSet<Symbol>() {

			{
				add(DNATools.a());
				add(DNATools.t());
				add(DNATools.a());
			}
		});

		return ambiguitySymbols;
	}

	private static Map<Symbol, Character> populateSymbolMap() {
		Map<Symbol, Character> symbolMap = new HashMap<Symbol, Character>();
		FiniteAlphabet dna = DNATools.getDNA();

		symbolMap.put(DNATools.a(), 'A');
		symbolMap.put(DNATools.c(), 'C');
		symbolMap.put(DNATools.g(), 'G');
		symbolMap.put(DNATools.t(), 'T');
		symbolMap.put(DNATools.n(), 'N');
		symbolMap.put(dna.getGapSymbol(), '-');
		try {
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('R')), 'R');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('Y')), 'Y');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('W')), 'W');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('S')), 'S');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('M')), 'M');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('K')), 'K');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('H')), 'H');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('B')), 'B');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('V')), 'V');
			symbolMap.put(dna.getAmbiguity(AMBIGUITY_SYMBOLS.get('D')), 'D');
		} catch (IllegalSymbolException e) {

		}

		return symbolMap;
	}
}
