2 // makelookupcommand.cpp
5 // Created by SarahsWork on 5/14/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "makelookupcommand.h"
11 //**********************************************************************************************************************
12 vector<string> MakeLookupCommand::setParameters(){
14 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
15 CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow);
16 CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors);
17 CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode);
18 CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey);
19 CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold);
20 CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "MakeLookupCommand", "setParameters");
33 //**********************************************************************************************************************
34 string MakeLookupCommand::getHelpString(){
36 string helpString = "";
37 helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n";
38 helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n";
39 helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n";
40 helpString += "The flow parameter is used to provide the flow data. It is required.\n";
41 helpString += "The error parameter is used to provide the error summary. It is required.\n";
42 helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
43 helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n";
44 helpString += "The threshold parameter is ....Default=10000.\n";
45 helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
46 helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
47 helpString += "new(...)\n";
51 m->errorOut(e, "MakeLookupCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string MakeLookupCommand::getOutputPattern(string type) {
60 if (type == "lookup") { pattern = "[filename],lookup"; }
61 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
66 m->errorOut(e, "MakeLookupCommand", "getOutputPattern");
70 //**********************************************************************************************************************
71 MakeLookupCommand::MakeLookupCommand(){
73 abort = true; calledHelp = true;
75 vector<string> tempOutNames;
76 outputTypes["lookup"] = tempOutNames;
79 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
83 //**********************************************************************************************************************
84 MakeLookupCommand::MakeLookupCommand(string option) {
86 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
90 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93 //valid paramters for this command
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
99 ValidParameters validParameter;
100 map<string,string>::iterator it;
101 //check to make sure all parameters are valid for command
102 for (it = parameters.begin(); it != parameters.end(); it++) {
103 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
106 vector<string> tempOutNames;
107 outputTypes["lookup"] = tempOutNames;
109 //if the user changes the input directory command factory will send this info to us in the output parameter
110 string inputDir = validParameter.validFile(parameters, "inputdir", false);
111 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("flow");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["flow"] = inputDir + it->second; }
122 it = parameters.find("error");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["error"] = inputDir + it->second; }
130 it = parameters.find("reference");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["reference"] = inputDir + it->second; }
140 //check for parameters
141 errorFileName = validParameter.validFile(parameters, "error", true);
142 if (errorFileName == "not open") { errorFileName = ""; abort = true; }
143 else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required."); m->mothurOutEndLine(); abort = true; }
145 flowFileName = validParameter.validFile(parameters, "flow", true);
146 if (flowFileName == "not open") { flowFileName = ""; abort = true; }
147 else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required."); m->mothurOutEndLine(); abort = true; }
148 else { m->setFlowFile(flowFileName); }
150 refFastaFileName = validParameter.validFile(parameters, "reference", true);
151 if (refFastaFileName == "not open") { abort = true; }
152 else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required."); m->mothurOutEndLine(); abort = true; }
154 //if the user changes the output directory command factory will send this info to us in the output parameter
155 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
156 outputDir = m->hasPath(flowFileName); //if user entered a file with a path then preserve it
159 string temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "10000"; }
160 m->mothurConvert(temp, thresholdCount);
162 barcodeSequence = validParameter.validFile(parameters, "barcode", false); if (barcodeSequence == "not found"){ barcodeSequence = "AACCGTGTC"; }
164 keySequence = validParameter.validFile(parameters, "key", false); if (keySequence == "not found"){ keySequence = "TCAG"; }
166 temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
167 if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
170 if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
171 else if(toupper(temp[0]) == 'B'){
172 flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
173 else if(toupper(temp[0]) == 'I'){
174 flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; }
176 m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
183 catch(exception& e) {
184 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
188 //**********************************************************************************************************************
190 int MakeLookupCommand::execute(){
193 if (abort == true) { if (calledHelp) { return 0; } return 2; }
195 cout.setf(ios::fixed, ios::floatfield);
196 cout.setf(ios::showpoint);
198 double gapOpening = 10;
200 vector<vector<double> > penaltyMatrix; penaltyMatrix.resize(maxHomoP);
201 for(int i=0;i<maxHomoP;i++){
202 penaltyMatrix[i].resize(maxHomoP, 5);
203 penaltyMatrix[i][i] = 0;
206 //Create flows for reference sequences...
208 m->openInputFile(refFastaFileName, refFASTA); // * open reference sequence file
209 map<string, vector<double> > refFlowgrams;
211 while(!refFASTA.eof()){
212 if (m->control_pressed) { refFASTA.close(); return 0; }
214 Sequence seq(refFASTA); m->gobble(refFASTA);
216 if (m->debug) { m->mothurOut("[DEBUG]: seq = " + seq.getName() + ".\n"); }
218 string fullSequence = keySequence + barcodeSequence + seq.getAligned(); // * concatenate the keySequence, barcodeSequence, and
219 // referenceSequences
220 refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); // * translate concatenated sequences into flowgram
224 vector<vector<double> > lookupTable; lookupTable.resize(1000);
225 for(int i=0;i<1000;i++){
226 lookupTable[i].resize(11, 0);
229 if (m->debug) { m->mothurOut("[DEBUG]: here .\n"); }
230 //Loop through each sequence in the flow file and the error summary file.
232 m->openInputFile(flowFileName, flowFile);
234 flowFile >> numFlows;
236 if (m->debug) { m->mothurOut("[DEBUG]: numflows = " + toString(numFlows) + ".\n"); }
239 m->openInputFile(errorFileName, errorFile);
240 m->getline(errorFile); //grab headers
242 string errorQuery, flowQuery, referenceName, dummy;
246 vector<double> std; std.resize(11, 0);
248 while(errorFile && flowFile){
250 if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
252 // * if it's chimeric, chuck it
253 errorFile >> errorQuery >> referenceName;
254 for(int i=2;i<40;i++){
257 errorFile >> chimera;
261 m->getline(flowFile);
265 flowFile >> flowQuery >> dummy;
266 if(flowQuery != errorQuery){ cout << flowQuery << " != " << errorQuery << endl; }
268 map<string, vector<double> >::iterator it = refFlowgrams.find(referenceName); // * compare sequence to its closest reference
269 if (it == refFlowgrams.end()) {
270 m->mothurOut("[WARNING]: missing reference flow " + referenceName + ", ignoring flow " + flowQuery + ".\n");
271 m->getline(flowFile); m->gobble(flowFile);
273 vector<double> refFlow = it->second;
274 vector<double> flowgram; flowgram.resize(numFlows);
276 if (m->debug) { m->mothurOut("[DEBUG]: flowQuery = " + flowQuery + ".\t" + "refName " + referenceName+ ".\n"); }
278 for(int i=0;i<numFlows;i++){
279 flowFile >> intensity;
280 flowgram[i] = intensity;// (int)round(100 * intensity);
284 if (m->debug) { m->mothurOut("[DEBUG]: before align.\n"); }
286 alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
288 if (m->debug) { m->mothurOut("[DEBUG]: after align.\n"); }
290 if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
292 for(int i=0;i<flowgram.size();i++){
293 int count = (int)round(100*flowgram[i]);
294 if(count > 1000){count = 999;}
295 if(abs(flowgram[i]-refFlow[i])<=0.50){
296 lookupTable[count][int(refFlow[i])]++; // * build table
297 std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
303 m->gobble(errorFile);
306 errorFile.close(); flowFile.close();
310 vector<int> counts; counts.resize(11, 0);
312 for(int i=0;i<1000;i++){
313 for(int j=0;j<11;j++){
314 counts[j] += lookupTable[i][j];
315 totalCount += lookupTable[i][j];
320 for(int i=0;i<11;i++){
321 if(counts[i] < thresholdCount){ N = i; break; } //bring back
322 std[i] = sqrt(std[i]/(double)(counts[i])); //bring back
325 regress(std, N); //bring back
327 if (m->control_pressed) { return 0; }
329 double minProbability = 0.1 / (double)totalCount;
331 //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low
332 double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5);
334 for(int i=0;i<1000;i++){
335 if (m->control_pressed) { return 0; }
337 for(int j=0;j<N;j++){
338 if(lookupTable[i][j] == 0){
339 lookupTable[i][j] = 1; //bring back
341 lookupTable[i][j] = -log(lookupTable[i][j]/double(counts[j])); //bring back
344 for(int j=N;j<11;j++){ //bring back
345 double normalProbability = 1.0/((double)std[j] * sqrtTwoPi) * exp(-double(i - j*100)* double(i - j*100)/ double(2*std[j]*std[j]));
346 if(normalProbability > minProbability){
347 lookupTable[i][j] = -log(normalProbability);
350 lookupTable[i][j] = -log(minProbability);
356 //calculate the probability of each homopolymer length
357 vector<double> negLogHomoProb; negLogHomoProb.resize(11, 0.00); //bring back
358 for(int i=0;i<N;i++){
359 negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
361 regress(negLogHomoProb, N);
363 if (m->control_pressed) { return 0; }
365 //output data table. column one is the probability of each homopolymer length
366 map<string, string> variables;
367 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
368 string outputFile = getOutputFileName("lookup",variables);
369 outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile);
372 m->openOutputFile(outputFile, lookupFile);
373 lookupFile.precision(8);
375 for(int j=0;j<11;j++){
376 // lookupFile << counts[j];
377 lookupFile << showpoint << negLogHomoProb[j]; //bring back
378 for(int i=0;i<1000;i++){
379 lookupFile << '\t' << lookupTable[i][j];
385 m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n");
387 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
389 m->mothurOutEndLine();
390 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
391 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
392 m->mothurOutEndLine();
396 catch(exception& e) {
397 m->errorOut(e, "MakeLookupCommand", "execute");
401 //******************************************************************************************************************************
403 vector<double> MakeLookupCommand::convertSeqToFlow(string sequence, string order){
405 int seqLength = (int)sequence.length();
406 int numFlows = (int)order.length();
407 vector<double> flowgram;
410 int sequenceIndex = 0;
412 while(orderIndex < numFlows && sequenceIndex < seqLength){
414 if (m->control_pressed) { return flowgram; }
416 int homopolymerLength = 1;
418 char base = sequence[sequenceIndex];
420 while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){
427 for(int i=orderIndex; i<orderIndex+numFlows;i++){
428 if(order[i%numFlows] == base){
429 //flowgram[i] = homopolymerLength;
430 orderIndex = i%numFlows;
432 }else { flowgram.push_back(0); }
435 //flowgram[orderIndex] = homopolymerLength;
436 flowgram.push_back(homopolymerLength);
439 orderIndex = orderIndex % numFlows;
444 catch(exception& e) {
445 m->errorOut(e, "MakeLookupCommand", "convertSeqToFlow");
449 //******************************************************************************************************************************
451 int MakeLookupCommand::alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder){
453 int numQueryFlows = (int)flowgram.size();
454 int numRefFlows = (int)refFlow.size();
456 //cout << numQueryFlows << '\t' << numRefFlows << endl;
458 vector<vector<double> > scoreMatrix; scoreMatrix.resize(numQueryFlows+1);
459 vector<vector<char> > directMatrix; directMatrix.resize(numQueryFlows+1);
461 for(int i=0;i<=numQueryFlows;i++){
462 if (m->control_pressed) { return 0; }
463 scoreMatrix[i].resize(numRefFlows+1, 0.00);
464 directMatrix[i].resize(numRefFlows+1, 'x');
466 scoreMatrix[i][0] = i * gapOpening;
467 directMatrix[i][0] = 'u';
470 //cout << numQueryFlows << '\t' << numRefFlows << endl;
473 for(int i=0;i<=numRefFlows;i++){
474 scoreMatrix[0][i] = i * gapOpening;
475 directMatrix[0][i] = 'l';
479 for(int i=1;i<=numQueryFlows;i++){
480 for(int j=1;j<=numRefFlows;j++){
481 if (m->control_pressed) { return 0; }
482 double diagonal = 1000000000;
483 if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){
484 diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]];
486 double up = scoreMatrix[i-1][j] + gapOpening;
487 double left = scoreMatrix[i][j-1] + gapOpening;
489 double minScore = diagonal;
490 char direction = 'd';
492 if(left < diagonal && left < up){
496 else if(up < diagonal && up < left){
501 scoreMatrix[i][j] = minScore;
502 directMatrix[i][j] = direction;
507 int minRowIndex = numQueryFlows;
508 double minRowScore = scoreMatrix[numQueryFlows][numRefFlows];
509 for(int i=0;i<numQueryFlows;i++){
510 if (m->control_pressed) { return 0; }
511 if(scoreMatrix[i][numRefFlows] < minRowScore){
512 minRowScore = scoreMatrix[i][numRefFlows];
517 int minColumnIndex = numRefFlows;
518 double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
519 for(int i=0;i<numRefFlows;i++){
520 if (m->control_pressed) { return 0; }
521 if(scoreMatrix[numQueryFlows][i] < minColumnScore){
522 minColumnScore = scoreMatrix[numQueryFlows][i];
529 int j= minColumnIndex;
531 vector<double> newFlowgram;
532 vector<double> newRefFlowgram;
534 while(i > 0 && j > 0){
535 if (m->control_pressed) { return 0; }
536 if(directMatrix[i][j] == 'd'){
537 newFlowgram.push_back(flowgram[i-1]);
538 newRefFlowgram.push_back(refFlow[j-1]);
543 else if(directMatrix[i][j] == 'l'){
544 newFlowgram.push_back(0);
545 newRefFlowgram.push_back(refFlow[j-1]);
549 else if(directMatrix[i][j] == 'u'){
550 newFlowgram.push_back(flowgram[i-1]);
551 newRefFlowgram.push_back(0);
557 flowgram = newFlowgram;
558 refFlow = newRefFlowgram;
563 catch(exception& e) {
564 m->errorOut(e, "MakeLookupCommand", "alignFlowGrams");
569 //******************************************************************************************************************************
571 int MakeLookupCommand::regress(vector<double>& data, int N){
573 //fit data for larger values of N
577 for(int i=1;i<N;i++){
578 if (m->control_pressed) { return 0; }
585 double numerator = 0;
586 double denomenator = 0;
587 for(int i=1;i<N;i++){
588 if (m->control_pressed) { return 0; }
589 numerator += (i-xMean)*(data[i] - yMean);
590 denomenator += (i-xMean) * (i-xMean);
592 double slope = numerator / denomenator;
593 double intercept = yMean - slope * xMean;
595 for(int i=N;i<11;i++){
596 data[i] = intercept + i * slope;
601 catch(exception& e) {
602 m->errorOut(e, "MakeLookupCommand", "regress");
607 //******************************************************************************************************************************
609 //**********************************************************************************************************************