2 // makelookupcommand.cpp
5 // Created by SarahsWork on 5/14/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "makelookupcommand.h"
11 //**********************************************************************************************************************
12 vector<string> MakeLookupCommand::setParameters(){
14 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
15 CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow);
16 CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors);
17 CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode);
18 CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey);
19 CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold);
20 CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "MakeLookupCommand", "setParameters");
33 //**********************************************************************************************************************
34 string MakeLookupCommand::getHelpString(){
36 string helpString = "";
37 helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n";
38 helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n";
39 helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n";
40 helpString += "The flow parameter is used to provide the flow data. It is required.\n";
41 helpString += "The error parameter is used to provide the error summary. It is required.\n";
42 helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
43 helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n";
44 helpString += "The threshold parameter is ....Default=10000.\n";
45 helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
46 helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
47 helpString += "new(...)\n";
51 m->errorOut(e, "MakeLookupCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string MakeLookupCommand::getOutputPattern(string type) {
60 if (type == "lookup") { pattern = "[filename],lookup"; }
61 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
66 m->errorOut(e, "MakeLookupCommand", "getOutputPattern");
70 //**********************************************************************************************************************
71 MakeLookupCommand::MakeLookupCommand(){
73 abort = true; calledHelp = true;
75 vector<string> tempOutNames;
76 outputTypes["lookup"] = tempOutNames;
79 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
83 //**********************************************************************************************************************
84 MakeLookupCommand::MakeLookupCommand(string option) {
86 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
90 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93 //valid paramters for this command
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
99 ValidParameters validParameter;
100 map<string,string>::iterator it;
101 //check to make sure all parameters are valid for command
102 for (it = parameters.begin(); it != parameters.end(); it++) {
103 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
106 vector<string> tempOutNames;
107 outputTypes["lookup"] = tempOutNames;
109 //if the user changes the input directory command factory will send this info to us in the output parameter
110 string inputDir = validParameter.validFile(parameters, "inputdir", false);
111 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("flow");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["flow"] = inputDir + it->second; }
122 it = parameters.find("error");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["error"] = inputDir + it->second; }
130 it = parameters.find("reference");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["reference"] = inputDir + it->second; }
140 //check for parameters
141 errorFileName = validParameter.validFile(parameters, "error", true);
142 if (errorFileName == "not open") { errorFileName = ""; abort = true; }
143 else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required."); m->mothurOutEndLine(); abort = true; }
145 flowFileName = validParameter.validFile(parameters, "flow", true);
146 if (flowFileName == "not open") { flowFileName = ""; abort = true; }
147 else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required."); m->mothurOutEndLine(); abort = true; }
148 else { m->setFlowFile(flowFileName); }
150 refFastaFileName = validParameter.validFile(parameters, "reference", true);
151 if (refFastaFileName == "not open") { abort = true; }
152 else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required."); m->mothurOutEndLine(); abort = true; }
154 //if the user changes the output directory command factory will send this info to us in the output parameter
155 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
156 outputDir = m->hasPath(flowFileName); //if user entered a file with a path then preserve it
159 string temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "10000"; }
160 m->mothurConvert(temp, thresholdCount);
162 barcodeSequence = validParameter.validFile(parameters, "barcode", false); if (barcodeSequence == "not found"){ barcodeSequence = "AACCGTGTC"; }
164 keySequence = validParameter.validFile(parameters, "key", false); if (keySequence == "not found"){ keySequence = "TCAG"; }
166 temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
167 if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
170 if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
171 else if(toupper(temp[0]) == 'B'){
172 flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
173 else if(toupper(temp[0]) == 'I'){
174 flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; }
176 m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
183 catch(exception& e) {
184 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
188 //**********************************************************************************************************************
190 int MakeLookupCommand::execute(){
193 if (abort == true) { if (calledHelp) { return 0; } return 2; }
195 cout.setf(ios::fixed, ios::floatfield);
196 cout.setf(ios::showpoint);
198 double gapOpening = 10;
200 vector<vector<double> > penaltyMatrix(maxHomoP);
201 for(int i=0;i<maxHomoP;i++){
202 penaltyMatrix[i].resize(maxHomoP, 5);
203 penaltyMatrix[i][i] = 0;
206 //Create flows for reference sequences...
208 m->openInputFile(refFastaFileName, refFASTA); // * open reference sequence file
209 map<string, vector<double> > refFlowgrams;
211 while(!refFASTA.eof()){
212 if (m->control_pressed) { refFASTA.close(); return 0; }
213 Sequence seq(refFASTA); m->gobble(refFASTA);
215 string fullSequence = keySequence + barcodeSequence + seq.getAligned(); // * concatenate the keySequence, barcodeSequence, and
216 // referenceSequences
217 refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); // * translate concatenated sequences into flowgram
221 vector<vector<double> > lookupTable(1000);
222 for(int i=0;i<1000;i++){
223 lookupTable[i].resize(11, 0);
227 //Loop through each sequence in the flow file and the error summary file.
229 m->openInputFile(flowFileName, flowFile);
231 flowFile >> numFlows;
234 m->openInputFile(errorFileName, errorFile);
235 m->getline(errorFile); //grab headers
237 string errorQuery, flowQuery, referenceName, dummy;
241 vector<double> std(11, 0);
243 while(errorFile && flowFile){
245 if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
247 // * if it's chimeric, chuck it
248 errorFile >> errorQuery >> referenceName;
249 for(int i=2;i<40;i++){
252 errorFile >> chimera;
256 m->getline(flowFile);
260 flowFile >> flowQuery >> dummy;
261 if(flowQuery != errorQuery){ cout << flowQuery << " != " << errorQuery << endl; }
263 vector<double> refFlow = refFlowgrams[referenceName]; // * compare sequence to its closest reference
265 vector<double> flowgram(numFlows);
267 for(int i=0;i<numFlows;i++){
268 flowFile >> intensity;
269 flowgram[i] = intensity;// (int)round(100 * intensity);
273 alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
275 if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
277 for(int i=0;i<flowgram.size();i++){
278 int count = (int)round(100*flowgram[i]);
279 if(count > 1000){count = 999;}
280 if(abs(flowgram[i]-refFlow[i])<=0.50){
281 lookupTable[count][int(refFlow[i])]++; // * build table
282 std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
287 m->gobble(errorFile);
290 errorFile.close(); flowFile.close();
293 vector<int> counts(11, 0);
295 for(int i=0;i<1000;i++){
296 for(int j=0;j<11;j++){
297 counts[j] += lookupTable[i][j];
298 totalCount += lookupTable[i][j];
303 for(int i=0;i<11;i++){
304 if(counts[i] < thresholdCount){ N = i; break; } //bring back
305 std[i] = sqrt(std[i]/(double)(counts[i])); //bring back
308 regress(std, N); //bring back
310 if (m->control_pressed) { return 0; }
312 double minProbability = 0.1 / (double)totalCount;
314 //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low
315 double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5);
317 for(int i=0;i<1000;i++){
318 if (m->control_pressed) { return 0; }
320 for(int j=0;j<N;j++){
321 if(lookupTable[i][j] == 0){
322 lookupTable[i][j] = 1; //bring back
324 lookupTable[i][j] = -log(lookupTable[i][j]/double(counts[j])); //bring back
327 for(int j=N;j<11;j++){ //bring back
328 double normalProbability = 1.0/((double)std[j] * sqrtTwoPi) * exp(-double(i - j*100)* double(i - j*100)/ double(2*std[j]*std[j]));
329 if(normalProbability > minProbability){
330 lookupTable[i][j] = -log(normalProbability);
333 lookupTable[i][j] = -log(minProbability);
339 //calculate the probability of each homopolymer length
340 vector<double> negLogHomoProb(11, 0.00); //bring back
341 for(int i=0;i<N;i++){
342 negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
344 regress(negLogHomoProb, N);
346 if (m->control_pressed) { return 0; }
348 //output data table. column one is the probability of each homopolymer length
349 map<string, string> variables;
350 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
351 string outputFile = getOutputFileName("lookup",variables);
352 outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile);
355 m->openOutputFile(outputFile, lookupFile);
356 lookupFile.precision(8);
358 for(int j=0;j<11;j++){
359 // lookupFile << counts[j];
360 lookupFile << showpoint << negLogHomoProb[j]; //bring back
361 for(int i=0;i<1000;i++){
362 lookupFile << '\t' << lookupTable[i][j];
368 m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n");
370 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
372 m->mothurOutEndLine();
373 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
374 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
375 m->mothurOutEndLine();
379 catch(exception& e) {
380 m->errorOut(e, "MakeLookupCommand", "execute");
384 //******************************************************************************************************************************
386 vector<double> MakeLookupCommand::convertSeqToFlow(string sequence, string order){
388 int seqLength = (int)sequence.length();
389 int numFlows = (int)order.length();
390 vector<double> flowgram;
393 int sequenceIndex = 0;
395 while(orderIndex < numFlows && sequenceIndex < seqLength){
397 if (m->control_pressed) { return flowgram; }
399 int homopolymerLength = 1;
401 char base = sequence[sequenceIndex];
403 while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){
410 for(int i=orderIndex; i<orderIndex+numFlows;i++){
411 if(order[i%numFlows] == base){
412 //flowgram[i] = homopolymerLength;
413 orderIndex = i%numFlows;
415 }else { flowgram.push_back(0); }
418 //flowgram[orderIndex] = homopolymerLength;
419 flowgram.push_back(homopolymerLength);
422 orderIndex = orderIndex % numFlows;
427 catch(exception& e) {
428 m->errorOut(e, "MakeLookupCommand", "convertSeqToFlow");
432 //******************************************************************************************************************************
434 int MakeLookupCommand::alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder){
436 int numQueryFlows = (int)flowgram.size();
437 int numRefFlows = (int)refFlow.size();
439 //cout << numQueryFlows << '\t' << numRefFlows << endl;
441 vector<vector<double> > scoreMatrix(numQueryFlows+1);
442 vector<vector<char> > directMatrix(numQueryFlows+1);
444 for(int i=0;i<=numQueryFlows;i++){
445 if (m->control_pressed) { return 0; }
446 scoreMatrix[i].resize(numRefFlows+1, 0.00);
447 directMatrix[i].resize(numRefFlows+1, 'x');
449 scoreMatrix[i][0] = i * gapOpening;
450 directMatrix[i][0] = 'u';
453 //cout << numQueryFlows << '\t' << numRefFlows << endl;
456 for(int i=0;i<=numRefFlows;i++){
457 scoreMatrix[0][i] = i * gapOpening;
458 directMatrix[0][i] = 'l';
461 for(int i=1;i<=numQueryFlows;i++){
462 for(int j=1;j<=numRefFlows;j++){
463 if (m->control_pressed) { return 0; }
464 double diagonal = 1000000000;
465 if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){
466 diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]];
468 double up = scoreMatrix[i-1][j] + gapOpening;
469 double left = scoreMatrix[i][j-1] + gapOpening;
471 double minScore = diagonal;
472 char direction = 'd';
474 if(left < diagonal && left < up){
478 else if(up < diagonal && up < left){
483 scoreMatrix[i][j] = minScore;
484 directMatrix[i][j] = direction;
489 int minRowIndex = numQueryFlows;
490 double minRowScore = scoreMatrix[numQueryFlows][numRefFlows];
491 for(int i=0;i<numQueryFlows;i++){
492 if (m->control_pressed) { return 0; }
493 if(scoreMatrix[i][numRefFlows] < minRowScore){
494 minRowScore = scoreMatrix[i][numRefFlows];
499 int minColumnIndex = numRefFlows;
500 double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
501 for(int i=0;i<numQueryFlows;i++){
502 if (m->control_pressed) { return 0; }
503 if(scoreMatrix[numQueryFlows][i] < minColumnScore){
504 minColumnScore = scoreMatrix[numQueryFlows][i];
511 int j= minColumnIndex;
513 vector<double> newFlowgram;
514 vector<double> newRefFlowgram;
516 while(i > 0 && j > 0){
517 if (m->control_pressed) { return 0; }
518 if(directMatrix[i][j] == 'd'){
519 newFlowgram.push_back(flowgram[i-1]);
520 newRefFlowgram.push_back(refFlow[j-1]);
525 else if(directMatrix[i][j] == 'l'){
526 newFlowgram.push_back(0);
527 newRefFlowgram.push_back(refFlow[j-1]);
531 else if(directMatrix[i][j] == 'u'){
532 newFlowgram.push_back(flowgram[i-1]);
533 newRefFlowgram.push_back(0);
539 flowgram = newFlowgram;
540 refFlow = newRefFlowgram;
545 catch(exception& e) {
546 m->errorOut(e, "MakeLookupCommand", "alignFlowGrams");
551 //******************************************************************************************************************************
553 int MakeLookupCommand::regress(vector<double>& data, int N){
555 //fit data for larger values of N
559 for(int i=1;i<N;i++){
560 if (m->control_pressed) { return 0; }
567 double numerator = 0;
568 double denomenator = 0;
569 for(int i=1;i<N;i++){
570 if (m->control_pressed) { return 0; }
571 numerator += (i-xMean)*(data[i] - yMean);
572 denomenator += (i-xMean) * (i-xMean);
574 double slope = numerator / denomenator;
575 double intercept = yMean - slope * xMean;
577 for(int i=N;i<11;i++){
578 data[i] = intercept + i * slope;
583 catch(exception& e) {
584 m->errorOut(e, "MakeLookupCommand", "regress");
589 //******************************************************************************************************************************
591 //**********************************************************************************************************************