]> git.donarmstrong.com Git - mothur.git/blob - makelookupcommand.cpp
added modify names parameter to set.dir
[mothur.git] / makelookupcommand.cpp
1 //
2 //  makelookupcommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 5/14/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "makelookupcommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeLookupCommand::setParameters(){
13         try {
14                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
15         CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow);
16         CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors);
17         CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode);
18                 CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey);
19         CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold);
20         CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "MakeLookupCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string MakeLookupCommand::getHelpString(){
35         try {
36                 string helpString = "";
37                 helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n";
38                 helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n";
39                 helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n";
40         helpString += "The flow parameter is used to provide the flow data. It is required.\n";
41         helpString += "The error parameter is used to provide the error summary. It is required.\n";
42         helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
43         helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n";
44         helpString += "The threshold parameter is ....Default=10000.\n";
45         helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
46                 helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
47                 helpString += "new(...)\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "MakeLookupCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string MakeLookupCommand::getOutputPattern(string type) {
57     try {
58         string pattern = "";
59         
60         if (type == "lookup") {  pattern = "[filename],lookup"; }
61         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
62         
63         return pattern;
64     }
65     catch(exception& e) {
66         m->errorOut(e, "MakeLookupCommand", "getOutputPattern");
67         exit(1);
68     }
69 }
70 //**********************************************************************************************************************
71 MakeLookupCommand::MakeLookupCommand(){
72         try {
73                 abort = true; calledHelp = true;
74                 setParameters();
75         vector<string> tempOutNames;
76                 outputTypes["lookup"] = tempOutNames; 
77     }
78         catch(exception& e) {
79                 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 MakeLookupCommand::MakeLookupCommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;
87                 
88                 //allow user to run help
89                 if(option == "help") { help(); abort = true; calledHelp = true; }
90                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91                 
92                 else {
93                         //valid paramters for this command
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         map<string,string>::iterator it;
101                         //check to make sure all parameters are valid for command
102                         for (it = parameters.begin(); it != parameters.end(); it++) {
103                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
104                         }
105                         
106             vector<string> tempOutNames;
107             outputTypes["lookup"] = tempOutNames; 
108                         
109                         //if the user changes the input directory command factory will send this info to us in the output parameter
110                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
111                         if (inputDir == "not found"){   inputDir = "";          }
112                         else {
113                 string path;
114                                 it = parameters.find("flow");
115                                 //user has given a template file
116                                 if(it != parameters.end()){
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
120                                 }
121                                 
122                                 it = parameters.find("error");
123                                 //user has given a template file
124                                 if(it != parameters.end()){
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["error"] = inputDir + it->second;            }
128                                 }
129                                 
130                                 it = parameters.find("reference");
131                                 //user has given a template file
132                                 if(it != parameters.end()){
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
136                                 }
137                 
138             }
139                         
140                         //check for parameters
141             errorFileName = validParameter.validFile(parameters, "error", true);
142                         if (errorFileName == "not open") { errorFileName = ""; abort = true; }
143                         else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required."); m->mothurOutEndLine();  abort = true; }
144                         
145                         flowFileName = validParameter.validFile(parameters, "flow", true);
146                         if (flowFileName == "not open") { flowFileName = ""; abort = true; }
147                         else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required."); m->mothurOutEndLine();  abort = true; }
148                         else {   m->setFlowFile(flowFileName);  }
149                         
150                         refFastaFileName = validParameter.validFile(parameters, "reference", true);
151                         if (refFastaFileName == "not open") { abort = true; }
152                         else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required."); m->mothurOutEndLine();  abort = true; }
153                       
154             //if the user changes the output directory command factory will send this info to us in the output parameter
155                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
156                                 outputDir = m->hasPath(flowFileName); //if user entered a file with a path then preserve it
157                         }
158             
159             string temp = validParameter.validFile(parameters, "threshold", false);     if (temp == "not found"){       temp = "10000"; }
160                         m->mothurConvert(temp, thresholdCount);
161             
162             barcodeSequence = validParameter.validFile(parameters, "barcode", false);   if (barcodeSequence == "not found"){    barcodeSequence = "AACCGTGTC";  }
163             
164             keySequence = validParameter.validFile(parameters, "key", false);   if (keySequence == "not found"){        keySequence = "TCAG";   }
165             
166             temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){     temp = "A";     }
167             if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
168             }
169             else {
170                 if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
171                 else if(toupper(temp[0]) == 'B'){
172                     flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
173                 else if(toupper(temp[0]) == 'I'){
174                     flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
175                 else {
176                     m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
177                 }
178             }
179                         
180                 }
181                 
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
185                 exit(1);
186         }
187 }
188 //**********************************************************************************************************************
189
190 int MakeLookupCommand::execute(){
191         try {
192         
193                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
194         
195         cout.setf(ios::fixed, ios::floatfield);
196         cout.setf(ios::showpoint);
197         
198         double gapOpening = 10;
199         int maxHomoP = 101;
200         vector<vector<double> > penaltyMatrix(maxHomoP);
201         for(int i=0;i<maxHomoP;i++){
202             penaltyMatrix[i].resize(maxHomoP, 5);
203             penaltyMatrix[i][i] = 0;
204         }
205         
206         //Create flows for reference sequences...
207         ifstream refFASTA;
208         m->openInputFile(refFastaFileName, refFASTA);                        //  *   open reference sequence file
209         map<string, vector<double> > refFlowgrams;
210         
211         while(!refFASTA.eof()){
212             if (m->control_pressed) { refFASTA.close(); return 0; }
213             Sequence seq(refFASTA);  m->gobble(refFASTA);
214             
215             string fullSequence = keySequence + barcodeSequence + seq.getAligned(); //  *   concatenate the keySequence, barcodeSequence, and
216             //      referenceSequences
217             refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); //  *   translate concatenated sequences into flowgram
218         }
219         refFASTA.close();
220         
221         vector<vector<double> > lookupTable(1000);
222         for(int i=0;i<1000;i++){
223             lookupTable[i].resize(11, 0);
224         }
225         
226         
227         //Loop through each sequence in the flow file and the error summary file.
228         ifstream flowFile;
229         m->openInputFile(flowFileName, flowFile);
230         int numFlows;
231         flowFile >> numFlows;
232         
233         ifstream errorFile;
234         m->openInputFile(errorFileName, errorFile);
235         m->getline(errorFile); //grab headers
236         
237         string errorQuery, flowQuery, referenceName, dummy;
238         string chimera;
239         float intensity;
240         
241         vector<double> std(11, 0);
242         
243         while(errorFile && flowFile){
244             
245             if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
246             
247             //  * if it's chimeric, chuck it
248             errorFile >> errorQuery >> referenceName;
249             for(int i=2;i<40;i++){
250                 errorFile >> dummy;
251             }
252             errorFile >> chimera;
253            
254             
255             if(chimera == "2"){
256                 m->getline(flowFile);
257             }
258             else{
259                 
260                 flowFile >> flowQuery >> dummy;
261                 if(flowQuery != errorQuery){    cout << flowQuery << " != " << errorQuery << endl;  }
262                 
263                 vector<double> refFlow = refFlowgrams[referenceName];       //  * compare sequence to its closest reference
264                 
265                 vector<double> flowgram(numFlows);
266                 
267                 for(int i=0;i<numFlows;i++){
268                     flowFile >> intensity;
269                     flowgram[i] = intensity;// (int)round(100 * intensity);
270                 }
271                 m->gobble(flowFile);
272                 
273                 alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
274                 
275                 if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
276                 
277                 for(int i=0;i<flowgram.size();i++){
278                     int count = (int)round(100*flowgram[i]);
279                     if(count > 1000){count = 999;}
280                     if(abs(flowgram[i]-refFlow[i])<=0.50){
281                         lookupTable[count][int(refFlow[i])]++;               //  * build table
282                         std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
283                     }
284                 }
285                 
286             }
287             m->gobble(errorFile);
288             m->gobble(flowFile);
289         }
290         errorFile.close(); flowFile.close();
291         
292         //get probabilities
293         vector<int> counts(11, 0);
294         int totalCount = 0;
295         for(int i=0;i<1000;i++){
296             for(int j=0;j<11;j++){
297                 counts[j] += lookupTable[i][j];
298                 totalCount += lookupTable[i][j];
299             }
300         }
301         
302         int N = 11;
303         for(int i=0;i<11;i++){
304             if(counts[i] < thresholdCount){ N = i; break; }  //bring back
305             std[i] = sqrt(std[i]/(double)(counts[i]));  //bring back
306         }
307         
308         regress(std, N);  //bring back
309         
310         if (m->control_pressed) { return 0; }
311         
312         double minProbability = 0.1 / (double)totalCount;
313         
314         //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low
315         double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5);
316         
317         for(int i=0;i<1000;i++){
318             if (m->control_pressed) { return 0; }
319             
320             for(int j=0;j<N;j++){
321                 if(lookupTable[i][j] == 0){
322                     lookupTable[i][j] = 1;  //bring back
323                 }
324                 lookupTable[i][j] = -log(lookupTable[i][j]/double(counts[j]));  //bring back
325             }
326             
327             for(int j=N;j<11;j++){  //bring back
328                 double normalProbability = 1.0/((double)std[j] * sqrtTwoPi) * exp(-double(i - j*100)* double(i - j*100)/ double(2*std[j]*std[j]));
329                 if(normalProbability > minProbability){
330                     lookupTable[i][j] = -log(normalProbability);
331                 }
332                 else{
333                     lookupTable[i][j] = -log(minProbability);
334                 }
335             }
336         }
337         
338         
339         //calculate the probability of each homopolymer length
340         vector<double> negLogHomoProb(11, 0.00);  //bring back
341         for(int i=0;i<N;i++){
342             negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
343         }
344         regress(negLogHomoProb, N);
345         
346         if (m->control_pressed) { return 0; }
347         
348         //output data table.  column one is the probability of each homopolymer length
349         map<string, string> variables;
350         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
351                 string outputFile = getOutputFileName("lookup",variables);
352                 outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile);
353         
354         ofstream lookupFile;
355         m->openOutputFile(outputFile, lookupFile);
356         lookupFile.precision(8);
357         
358         for(int j=0;j<11;j++){
359             //        lookupFile << counts[j];
360             lookupFile << showpoint << negLogHomoProb[j]; //bring back
361             for(int i=0;i<1000;i++){
362                 lookupFile << '\t' << lookupTable[i][j];
363             }
364             lookupFile << endl;
365         }
366         lookupFile.close();
367         
368         m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n");
369          
370         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
371                 
372                 m->mothurOutEndLine();
373                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
374                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
375                 m->mothurOutEndLine();
376         
377         return 0;
378     }
379         catch(exception& e) {
380                 m->errorOut(e, "MakeLookupCommand", "execute");
381                 exit(1);
382         }
383 }
384 //******************************************************************************************************************************
385
386 vector<double> MakeLookupCommand::convertSeqToFlow(string sequence, string order){
387     try {
388         int seqLength = (int)sequence.length();
389         int numFlows = (int)order.length();
390         vector<double> flowgram;
391         
392         int orderIndex = 0;
393         int sequenceIndex = 0;
394         
395         while(orderIndex < numFlows && sequenceIndex < seqLength){
396             
397             if (m->control_pressed) { return flowgram; }
398             
399             int homopolymerLength = 1;
400             
401             char base = sequence[sequenceIndex];
402             
403             while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){
404                 homopolymerLength++;
405                 sequenceIndex++;
406             }
407             
408             sequenceIndex++;
409             
410             for(int i=orderIndex; i<orderIndex+numFlows;i++){
411                 if(order[i%numFlows] == base){
412                     //flowgram[i] = homopolymerLength;
413                     orderIndex = i%numFlows;
414                     break;
415                 }else {  flowgram.push_back(0); }
416             }
417             
418             //flowgram[orderIndex] = homopolymerLength;
419             flowgram.push_back(homopolymerLength);
420             
421             orderIndex++;
422             orderIndex = orderIndex % numFlows;
423         }
424         
425         return flowgram;
426     }
427         catch(exception& e) {
428                 m->errorOut(e, "MakeLookupCommand", "convertSeqToFlow");
429                 exit(1);
430         }
431 }
432 //******************************************************************************************************************************
433
434 int MakeLookupCommand::alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder){
435     try {
436         int numQueryFlows = (int)flowgram.size();
437         int numRefFlows = (int)refFlow.size();
438         
439             //cout << numQueryFlows << '\t' << numRefFlows << endl;
440         
441         vector<vector<double> > scoreMatrix(numQueryFlows+1);
442         vector<vector<char> > directMatrix(numQueryFlows+1);
443         
444         for(int i=0;i<=numQueryFlows;i++){
445             if (m->control_pressed) { return 0; }
446             scoreMatrix[i].resize(numRefFlows+1, 0.00);
447             directMatrix[i].resize(numRefFlows+1, 'x');
448             
449             scoreMatrix[i][0] = i * gapOpening;
450             directMatrix[i][0] = 'u';
451         }
452         
453             //cout << numQueryFlows << '\t' << numRefFlows << endl;
454         
455         
456         for(int i=0;i<=numRefFlows;i++){
457             scoreMatrix[0][i] = i * gapOpening;
458             directMatrix[0][i] = 'l';
459         }
460         
461         for(int i=1;i<=numQueryFlows;i++){
462             for(int j=1;j<=numRefFlows;j++){
463                 if (m->control_pressed) { return 0; }
464                 double diagonal = 1000000000;
465                 if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){
466                     diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]];
467                 }
468                 double up = scoreMatrix[i-1][j] + gapOpening;
469                 double left = scoreMatrix[i][j-1] + gapOpening;
470                 
471                 double minScore = diagonal;
472                 char direction = 'd';
473                 
474                 if(left < diagonal && left < up){
475                     minScore = left;
476                     direction = 'l';
477                 }
478                 else if(up < diagonal && up < left){
479                     minScore = up;
480                     direction = 'u';
481                 }
482                 
483                 scoreMatrix[i][j] = minScore;
484                 directMatrix[i][j] = direction;
485                 
486             }
487         }
488         
489         int minRowIndex = numQueryFlows;
490         double minRowScore = scoreMatrix[numQueryFlows][numRefFlows];
491         for(int i=0;i<numQueryFlows;i++){
492             if (m->control_pressed) { return 0; }
493             if(scoreMatrix[i][numRefFlows] < minRowScore){
494                 minRowScore = scoreMatrix[i][numRefFlows];
495                 minRowIndex = i;
496             }
497         }
498         
499         int minColumnIndex = numRefFlows;
500         double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
501         for(int i=0;i<numQueryFlows;i++){
502             if (m->control_pressed) { return 0; }
503             if(scoreMatrix[numQueryFlows][i] < minColumnScore){
504                 minColumnScore = scoreMatrix[numQueryFlows][i];
505                 minColumnIndex = i;
506             }
507         }
508         
509         
510         int i=minRowIndex;
511         int j= minColumnIndex;
512         
513         vector<double> newFlowgram;
514         vector<double> newRefFlowgram;
515         
516         while(i > 0 && j > 0){
517             if (m->control_pressed) { return 0; }
518             if(directMatrix[i][j] == 'd'){
519                 newFlowgram.push_back(flowgram[i-1]);
520                 newRefFlowgram.push_back(refFlow[j-1]);
521                 
522                 i--;
523                 j--;
524             }
525             else if(directMatrix[i][j] == 'l'){
526                 newFlowgram.push_back(0);
527                 newRefFlowgram.push_back(refFlow[j-1]);
528                 
529                 j--;
530             }
531             else if(directMatrix[i][j] == 'u'){
532                 newFlowgram.push_back(flowgram[i-1]);
533                 newRefFlowgram.push_back(0);
534                 
535                 i--;
536             }
537         }
538         
539         flowgram = newFlowgram;
540         refFlow = newRefFlowgram;
541         
542         return 0;
543     
544     }
545         catch(exception& e) {
546                 m->errorOut(e, "MakeLookupCommand", "alignFlowGrams");
547                 exit(1);
548         }
549 }
550
551 //******************************************************************************************************************************
552
553 int MakeLookupCommand::regress(vector<double>& data, int N){
554     try {
555         //fit data for larger values of N
556         double xMean = 0;
557         double yMean = 0;
558         
559         for(int i=1;i<N;i++){
560             if (m->control_pressed) { return 0; }
561             xMean += i;
562             yMean += data[i];
563         }
564         xMean /= (N-1);
565         yMean /= (N-1);
566         
567         double numerator = 0;
568         double denomenator = 0;
569         for(int i=1;i<N;i++){
570             if (m->control_pressed) { return 0; }
571             numerator += (i-xMean)*(data[i] - yMean);
572             denomenator += (i-xMean) * (i-xMean);
573         }
574         double slope = numerator / denomenator;
575         double intercept = yMean - slope * xMean;
576         
577         for(int i=N;i<11;i++){
578             data[i] = intercept + i * slope;
579         }
580         
581         return 0;
582     }
583         catch(exception& e) {
584                 m->errorOut(e, "MakeLookupCommand", "regress");
585                 exit(1);
586         }
587 }
588
589 //******************************************************************************************************************************
590
591 //**********************************************************************************************************************
592
593