]> git.donarmstrong.com Git - mothur.git/blobdiff - makelookupcommand.cpp
added make.lookup command.
[mothur.git] / makelookupcommand.cpp
diff --git a/makelookupcommand.cpp b/makelookupcommand.cpp
new file mode 100644 (file)
index 0000000..59e26c7
--- /dev/null
@@ -0,0 +1,593 @@
+//
+//  makelookupcommand.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 5/14/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "makelookupcommand.h"
+
+//**********************************************************************************************************************
+vector<string> MakeLookupCommand::setParameters(){
+       try {
+               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+        CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow);
+        CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors);
+        CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode);
+               CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey);
+        CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold);
+        CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string MakeLookupCommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n";
+               helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n";
+               helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n";
+        helpString += "The flow parameter is used to provide the flow data. It is required.\n";
+        helpString += "The error parameter is used to provide the error summary. It is required.\n";
+        helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
+        helpString += "The key parameter is used to provide the key sequence. Default=TACG.\n";
+        helpString += "The threshold parameter is ....\n";
+        helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
+               helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
+               helpString += "new(...)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string MakeLookupCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "lookup") {  pattern = "[filename],lookup"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeLookupCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+MakeLookupCommand::MakeLookupCommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["lookup"] = tempOutNames; 
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+MakeLookupCommand::MakeLookupCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["lookup"] = tempOutNames; 
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                string path;
+                               it = parameters.find("flow");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["flow"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("error");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["error"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("reference");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["reference"] = inputDir + it->second;                }
+                               }
+                
+            }
+                        
+                       //check for parameters
+            errorFileName = validParameter.validFile(parameters, "error", true);
+                       if (errorFileName == "not open") { errorFileName = ""; abort = true; }
+                       else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required."); m->mothurOutEndLine();  abort = true; }
+                       
+                       flowFileName = validParameter.validFile(parameters, "flow", true);
+                       if (flowFileName == "not open") { flowFileName = ""; abort = true; }
+                       else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required."); m->mothurOutEndLine();  abort = true; }
+                       else {   m->setFlowFile(flowFileName);  }
+                       
+                       refFastaFileName = validParameter.validFile(parameters, "reference", true);
+                       if (refFastaFileName == "not open") { abort = true; }
+                       else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required."); m->mothurOutEndLine();  abort = true; }
+                      
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(flowFileName); //if user entered a file with a path then preserve it
+                       }
+            
+            string temp = validParameter.validFile(parameters, "threshold", false);    if (temp == "not found"){       temp = "10000"; }
+                       m->mothurConvert(temp, thresholdCount);
+            
+            barcodeSequence = validParameter.validFile(parameters, "barcode", false);  if (barcodeSequence == "not found"){    barcodeSequence = "AACCGTGTC";  }
+            
+            keySequence = validParameter.validFile(parameters, "key", false);  if (keySequence == "not found"){        keySequence = "TCAG";   }
+            
+            temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){    temp = "A";     }
+            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+            }
+            else {
+                if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
+                else if(toupper(temp[0]) == 'B'){
+                    flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
+                else if(toupper(temp[0]) == 'I'){
+                    flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
+                else {
+                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+                }
+            }
+                       
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MakeLookupCommand::execute(){
+       try {
+        
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        cout.setf(ios::fixed, ios::floatfield);
+        cout.setf(ios::showpoint);
+        
+        double gapOpening = 10;
+        int maxHomoP = 101;
+        vector<vector<double> > penaltyMatrix(maxHomoP);
+        for(int i=0;i<maxHomoP;i++){
+            penaltyMatrix[i].resize(maxHomoP, 5);
+            penaltyMatrix[i][i] = 0;
+        }
+        
+        //Create flows for reference sequences...
+        ifstream refFASTA;
+        m->openInputFile(refFastaFileName, refFASTA);                        //  *   open reference sequence file
+        map<string, vector<double> > refFlowgrams;
+        
+        while(!refFASTA.eof()){
+            if (m->control_pressed) { refFASTA.close(); return 0; }
+            Sequence seq(refFASTA);  m->gobble(refFASTA);
+            
+            string fullSequence = keySequence + barcodeSequence + seq.getAligned(); //  *   concatenate the keySequence, barcodeSequence, and
+            //      referenceSequences
+            refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); //  *   translate concatenated sequences into flowgram
+        }
+        refFASTA.close();
+        
+        vector<vector<double> > lookupTable(1000);
+        for(int i=0;i<1000;i++){
+            lookupTable[i].resize(11, 0);
+        }
+        
+        
+        //Loop through each sequence in the flow file and the error summary file.
+        ifstream flowFile;
+        m->openInputFile(flowFileName, flowFile);
+        int numFlows;
+        flowFile >> numFlows;
+        
+        ifstream errorFile;
+        m->openInputFile(errorFileName, errorFile);
+        m->getline(errorFile); //grab headers
+        
+        string errorQuery, flowQuery, referenceName, dummy;
+        string chimera;
+        float intensity;
+        
+        vector<double> std(11, 0);
+        
+        while(errorFile && flowFile){
+            
+            if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
+            
+            //  * if it's chimeric, chuck it
+            errorFile >> errorQuery >> referenceName;
+            for(int i=2;i<40;i++){
+                errorFile >> dummy;
+            }
+            errorFile >> chimera;
+           
+            
+            if(chimera == "2"){
+                m->getline(flowFile);
+            }
+            else{
+                
+                flowFile >> flowQuery >> dummy;
+                if(flowQuery != errorQuery){    cout << flowQuery << " != " << errorQuery << endl;  }
+                
+                vector<double> refFlow = refFlowgrams[referenceName];       //  * compare sequence to its closest reference
+                
+                vector<double> flowgram(numFlows);
+                
+                for(int i=0;i<numFlows;i++){
+                    flowFile >> intensity;
+                    flowgram[i] = intensity;// (int)round(100 * intensity);
+                }
+                m->gobble(flowFile);
+                
+                alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
+                
+                if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
+                
+                for(int i=0;i<flowgram.size();i++){
+                    int count = (int)round(100*flowgram[i]);
+                    if(count > 1000){count = 999;}
+                    if(abs(flowgram[i]-refFlow[i])<=0.50){
+                        lookupTable[count][int(refFlow[i])]++;               //  * build table
+                        std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
+                    }
+                }
+                
+            }
+            m->gobble(errorFile);
+            m->gobble(flowFile);
+        }
+        errorFile.close(); flowFile.close();
+        
+        //get probabilities
+        vector<int> counts(11, 0);
+        int totalCount = 0;
+        for(int i=0;i<1000;i++){
+            for(int j=0;j<11;j++){
+                counts[j] += lookupTable[i][j];
+                totalCount += lookupTable[i][j];
+            }
+        }
+        
+        int N = 11;
+        for(int i=0;i<11;i++){
+            if(counts[i] < thresholdCount){ N = i; break; }  //bring back
+            std[i] = sqrt(std[i]/(double)(counts[i]));  //bring back
+        }
+        
+        regress(std, N);  //bring back
+        
+        if (m->control_pressed) { return 0; }
+        
+        double minProbability = 0.1 / (double)totalCount;
+        
+        //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low
+        double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5);
+        
+        for(int i=0;i<1000;i++){
+            if (m->control_pressed) { return 0; }
+            
+            for(int j=0;j<N;j++){
+                if(lookupTable[i][j] == 0){
+                    lookupTable[i][j] = 1;  //bring back
+                }
+                lookupTable[i][j] = -log(lookupTable[i][j]/double(counts[j]));  //bring back
+            }
+            
+            for(int j=N;j<11;j++){  //bring back
+                double normalProbability = 1.0/((double)std[j] * sqrtTwoPi) * exp(-double(i - j*100)* double(i - j*100)/ double(2*std[j]*std[j]));
+                if(normalProbability > minProbability){
+                    lookupTable[i][j] = -log(normalProbability);
+                }
+                else{
+                    lookupTable[i][j] = -log(minProbability);
+                }
+            }
+        }
+        
+        
+        //calculate the probability of each homopolymer length
+        vector<double> negLogHomoProb(11, 0.00);  //bring back
+        for(int i=0;i<N;i++){
+            negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
+        }
+        regress(negLogHomoProb, N);
+        
+        if (m->control_pressed) { return 0; }
+        
+        //output data table.  column one is the probability of each homopolymer length
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
+               string outputFile = getOutputFileName("lookup",variables);
+               outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile);
+        
+        ofstream lookupFile;
+        m->openOutputFile(outputFile, lookupFile);
+        lookupFile.precision(8);
+        
+        for(int j=0;j<11;j++){
+            //        lookupFile << counts[j];
+            lookupFile << showpoint << negLogHomoProb[j]; //bring back
+            for(int i=0;i<1000;i++){
+                lookupFile << '\t' << lookupTable[i][j];
+            }
+            lookupFile << endl;
+        }
+        lookupFile.close();
+        
+        m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n");
+         
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "execute");
+               exit(1);
+       }
+}
+//******************************************************************************************************************************
+
+vector<double> MakeLookupCommand::convertSeqToFlow(string sequence, string order){
+    try {
+        int seqLength = (int)sequence.length();
+        int numFlows = (int)order.length();
+        vector<double> flowgram;
+        
+        int orderIndex = 0;
+        int sequenceIndex = 0;
+        
+        while(orderIndex < numFlows && sequenceIndex < seqLength){
+            
+            if (m->control_pressed) { return flowgram; }
+            
+            int homopolymerLength = 1;
+            
+            char base = sequence[sequenceIndex];
+            
+            while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){
+                homopolymerLength++;
+                sequenceIndex++;
+            }
+            
+            sequenceIndex++;
+            
+            for(int i=orderIndex; i<orderIndex+numFlows;i++){
+                if(order[i%numFlows] == base){
+                    //flowgram[i] = homopolymerLength;
+                    orderIndex = i%numFlows;
+                    break;
+                }else {  flowgram.push_back(0); }
+            }
+            
+            //flowgram[orderIndex] = homopolymerLength;
+            flowgram.push_back(homopolymerLength);
+            
+            orderIndex++;
+            orderIndex = orderIndex % numFlows;
+        }
+        
+        return flowgram;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "convertSeqToFlow");
+               exit(1);
+       }
+}
+//******************************************************************************************************************************
+
+int MakeLookupCommand::alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder){
+    try {
+        int numQueryFlows = (int)flowgram.size();
+        int numRefFlows = (int)refFlow.size();
+        
+            //cout << numQueryFlows << '\t' << numRefFlows << endl;
+        
+        vector<vector<double> > scoreMatrix(numQueryFlows+1);
+        vector<vector<char> > directMatrix(numQueryFlows+1);
+        
+        for(int i=0;i<=numQueryFlows;i++){
+            if (m->control_pressed) { return 0; }
+            scoreMatrix[i].resize(numRefFlows+1, 0.00);
+            directMatrix[i].resize(numRefFlows+1, 'x');
+            
+            scoreMatrix[i][0] = i * gapOpening;
+            directMatrix[i][0] = 'u';
+        }
+        
+            //cout << numQueryFlows << '\t' << numRefFlows << endl;
+        
+        
+        for(int i=0;i<=numRefFlows;i++){
+            scoreMatrix[0][i] = i * gapOpening;
+            directMatrix[0][i] = 'l';
+        }
+        
+        for(int i=1;i<=numQueryFlows;i++){
+            for(int j=1;j<=numRefFlows;j++){
+                if (m->control_pressed) { return 0; }
+                double diagonal = 1000000000;
+                if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){
+                    diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]];
+                }
+                double up = scoreMatrix[i-1][j] + gapOpening;
+                double left = scoreMatrix[i][j-1] + gapOpening;
+                
+                double minScore = diagonal;
+                char direction = 'd';
+                
+                if(left < diagonal && left < up){
+                    minScore = left;
+                    direction = 'l';
+                }
+                else if(up < diagonal && up < left){
+                    minScore = up;
+                    direction = 'u';
+                }
+                
+                scoreMatrix[i][j] = minScore;
+                directMatrix[i][j] = direction;
+                
+            }
+        }
+        
+        int minRowIndex = numQueryFlows;
+        double minRowScore = scoreMatrix[numQueryFlows][numRefFlows];
+        for(int i=0;i<numQueryFlows;i++){
+            if (m->control_pressed) { return 0; }
+            if(scoreMatrix[i][numRefFlows] < minRowScore){
+                minRowScore = scoreMatrix[i][numRefFlows];
+                minRowIndex = i;
+            }
+        }
+        
+        int minColumnIndex = numRefFlows;
+        double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
+        for(int i=0;i<numQueryFlows;i++){
+            if (m->control_pressed) { return 0; }
+            if(scoreMatrix[numQueryFlows][i] < minColumnScore){
+                minColumnScore = scoreMatrix[numQueryFlows][i];
+                minColumnIndex = i;
+            }
+        }
+        
+        
+        int i=minRowIndex;
+        int j= minColumnIndex;
+        
+        vector<double> newFlowgram;
+        vector<double> newRefFlowgram;
+        
+        while(i > 0 && j > 0){
+            if (m->control_pressed) { return 0; }
+            if(directMatrix[i][j] == 'd'){
+                newFlowgram.push_back(flowgram[i-1]);
+                newRefFlowgram.push_back(refFlow[j-1]);
+                
+                i--;
+                j--;
+            }
+            else if(directMatrix[i][j] == 'l'){
+                newFlowgram.push_back(0);
+                newRefFlowgram.push_back(refFlow[j-1]);
+                
+                j--;
+            }
+            else if(directMatrix[i][j] == 'u'){
+                newFlowgram.push_back(flowgram[i-1]);
+                newRefFlowgram.push_back(0);
+                
+                i--;
+            }
+        }
+        
+        flowgram = newFlowgram;
+        refFlow = newRefFlowgram;
+        
+        return 0;
+    
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "alignFlowGrams");
+               exit(1);
+       }
+}
+
+//******************************************************************************************************************************
+
+int MakeLookupCommand::regress(vector<double>& data, int N){
+    try {
+        //fit data for larger values of N
+        double xMean = 0;
+        double yMean = 0;
+        
+        for(int i=1;i<N;i++){
+            if (m->control_pressed) { return 0; }
+            xMean += i;
+            yMean += data[i];
+        }
+        xMean /= (N-1);
+        yMean /= (N-1);
+        
+        double numerator = 0;
+        double denomenator = 0;
+        for(int i=1;i<N;i++){
+            if (m->control_pressed) { return 0; }
+            numerator += (i-xMean)*(data[i] - yMean);
+            denomenator += (i-xMean) * (i-xMean);
+        }
+        double slope = numerator / denomenator;
+        double intercept = yMean - slope * xMean;
+        
+        for(int i=N;i<11;i++){
+            data[i] = intercept + i * slope;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "regress");
+               exit(1);
+       }
+}
+
+//******************************************************************************************************************************
+
+//**********************************************************************************************************************
+
+