]> git.donarmstrong.com Git - mothur.git/blob - makelookupcommand.cpp
fixes while testing 1.33.0
[mothur.git] / makelookupcommand.cpp
1 //
2 //  makelookupcommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 5/14/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "makelookupcommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeLookupCommand::setParameters(){
13         try {
14                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
15         CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow);
16         CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors);
17         CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode);
18                 CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey);
19         CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold);
20         CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "MakeLookupCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string MakeLookupCommand::getHelpString(){
35         try {
36                 string helpString = "";
37                 helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n";
38                 helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n";
39                 helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n";
40         helpString += "The flow parameter is used to provide the flow data. It is required.\n";
41         helpString += "The error parameter is used to provide the error summary. It is required.\n";
42         helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
43         helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n";
44         helpString += "The threshold parameter is ....Default=10000.\n";
45         helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
46                 helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
47                 helpString += "new(...)\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "MakeLookupCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string MakeLookupCommand::getOutputPattern(string type) {
57     try {
58         string pattern = "";
59         
60         if (type == "lookup") {  pattern = "[filename],lookup"; }
61         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
62         
63         return pattern;
64     }
65     catch(exception& e) {
66         m->errorOut(e, "MakeLookupCommand", "getOutputPattern");
67         exit(1);
68     }
69 }
70 //**********************************************************************************************************************
71 MakeLookupCommand::MakeLookupCommand(){
72         try {
73                 abort = true; calledHelp = true;
74                 setParameters();
75         vector<string> tempOutNames;
76                 outputTypes["lookup"] = tempOutNames; 
77     }
78         catch(exception& e) {
79                 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 MakeLookupCommand::MakeLookupCommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;
87                 
88                 //allow user to run help
89                 if(option == "help") { help(); abort = true; calledHelp = true; }
90                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91                 
92                 else {
93                         //valid paramters for this command
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         map<string,string>::iterator it;
101                         //check to make sure all parameters are valid for command
102                         for (it = parameters.begin(); it != parameters.end(); it++) {
103                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
104                         }
105                         
106             vector<string> tempOutNames;
107             outputTypes["lookup"] = tempOutNames; 
108                         
109                         //if the user changes the input directory command factory will send this info to us in the output parameter
110                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
111                         if (inputDir == "not found"){   inputDir = "";          }
112                         else {
113                 string path;
114                                 it = parameters.find("flow");
115                                 //user has given a template file
116                                 if(it != parameters.end()){
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
120                                 }
121                                 
122                                 it = parameters.find("error");
123                                 //user has given a template file
124                                 if(it != parameters.end()){
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["error"] = inputDir + it->second;            }
128                                 }
129                                 
130                                 it = parameters.find("reference");
131                                 //user has given a template file
132                                 if(it != parameters.end()){
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
136                                 }
137                 
138             }
139                         
140                         //check for parameters
141             errorFileName = validParameter.validFile(parameters, "error", true);
142                         if (errorFileName == "not open") { errorFileName = ""; abort = true; }
143                         else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required."); m->mothurOutEndLine();  abort = true; }
144                         
145                         flowFileName = validParameter.validFile(parameters, "flow", true);
146                         if (flowFileName == "not open") { flowFileName = ""; abort = true; }
147                         else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required."); m->mothurOutEndLine();  abort = true; }
148                         else {   m->setFlowFile(flowFileName);  }
149                         
150                         refFastaFileName = validParameter.validFile(parameters, "reference", true);
151                         if (refFastaFileName == "not open") { abort = true; }
152                         else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required."); m->mothurOutEndLine();  abort = true; }
153                       
154             //if the user changes the output directory command factory will send this info to us in the output parameter
155                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
156                                 outputDir = m->hasPath(flowFileName); //if user entered a file with a path then preserve it
157                         }
158             
159             string temp = validParameter.validFile(parameters, "threshold", false);     if (temp == "not found"){       temp = "10000"; }
160                         m->mothurConvert(temp, thresholdCount);
161             
162             barcodeSequence = validParameter.validFile(parameters, "barcode", false);   if (barcodeSequence == "not found"){    barcodeSequence = "AACCGTGTC";  }
163             
164             keySequence = validParameter.validFile(parameters, "key", false);   if (keySequence == "not found"){        keySequence = "TCAG";   }
165             
166             temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){     temp = "A";     }
167             if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
168             }
169             else {
170                 if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
171                 else if(toupper(temp[0]) == 'B'){
172                     flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
173                 else if(toupper(temp[0]) == 'I'){
174                     flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
175                 else {
176                     m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
177                 }
178             }
179                         
180                 }
181                 
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
185                 exit(1);
186         }
187 }
188 //**********************************************************************************************************************
189
190 int MakeLookupCommand::execute(){
191         try {
192         
193                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
194         
195         cout.setf(ios::fixed, ios::floatfield);
196         cout.setf(ios::showpoint);
197         
198         double gapOpening = 10;
199         int maxHomoP = 101;
200         vector<vector<double> > penaltyMatrix; penaltyMatrix.resize(maxHomoP);
201         for(int i=0;i<maxHomoP;i++){
202             penaltyMatrix[i].resize(maxHomoP, 5);
203             penaltyMatrix[i][i] = 0;
204         }
205         
206         //Create flows for reference sequences...
207         ifstream refFASTA;
208         m->openInputFile(refFastaFileName, refFASTA);                        //  *   open reference sequence file
209         map<string, vector<double> > refFlowgrams;
210         
211         while(!refFASTA.eof()){
212             if (m->control_pressed) { refFASTA.close(); return 0; }
213             
214             Sequence seq(refFASTA);  m->gobble(refFASTA);
215             
216             if (m->debug) { m->mothurOut("[DEBUG]: seq = " + seq.getName() + ".\n"); }
217             
218             string fullSequence = keySequence + barcodeSequence + seq.getAligned(); //  *   concatenate the keySequence, barcodeSequence, and
219             //      referenceSequences
220             refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); //  *   translate concatenated sequences into flowgram
221         }
222         refFASTA.close();
223         
224         vector<vector<double> > lookupTable; lookupTable.resize(1000);
225         for(int i=0;i<1000;i++){
226             lookupTable[i].resize(11, 0);
227         }
228         
229         if (m->debug) { m->mothurOut("[DEBUG]: here .\n"); }
230         //Loop through each sequence in the flow file and the error summary file.
231         ifstream flowFile;
232         m->openInputFile(flowFileName, flowFile);
233         int numFlows;
234         flowFile >> numFlows;
235         
236         if (m->debug) { m->mothurOut("[DEBUG]: numflows = " + toString(numFlows) +  ".\n"); }
237         
238         ifstream errorFile;
239         m->openInputFile(errorFileName, errorFile);
240         m->getline(errorFile); //grab headers
241         
242         string errorQuery, flowQuery, referenceName, dummy;
243         string chimera;
244         float intensity;
245         
246         vector<double> std;  std.resize(11, 0);
247         
248         while(errorFile && flowFile){
249             
250             if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
251             
252             //  * if it's chimeric, chuck it
253             errorFile >> errorQuery >> referenceName;
254             for(int i=2;i<40;i++){
255                 errorFile >> dummy;
256             }
257             errorFile >> chimera;
258            
259             
260             if(chimera == "2"){
261                 m->getline(flowFile);
262             }
263             else{
264                 
265                 flowFile >> flowQuery >> dummy;
266                 if(flowQuery != errorQuery){    cout << flowQuery << " != " << errorQuery << endl;  }
267                 
268                 map<string, vector<double> >::iterator it = refFlowgrams.find(referenceName);       //  * compare sequence to its closest reference
269                 if (it == refFlowgrams.end()) {
270                     m->mothurOut("[WARNING]: missing reference flow " + referenceName + ", ignoring flow " + flowQuery + ".\n");
271                     m->getline(flowFile); m->gobble(flowFile);
272                 }else {
273                     vector<double> refFlow = it->second;
274                     vector<double> flowgram; flowgram.resize(numFlows);
275                     
276                     if (m->debug) { m->mothurOut("[DEBUG]: flowQuery = " + flowQuery +  ".\t" + "refName " + referenceName+  ".\n"); }
277                     
278                     for(int i=0;i<numFlows;i++){
279                         flowFile >> intensity;
280                         flowgram[i] = intensity;// (int)round(100 * intensity);
281                     }
282                     m->gobble(flowFile);
283                     
284                     if (m->debug) { m->mothurOut("[DEBUG]: before align.\n"); }
285                     
286                     alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
287                     
288                     if (m->debug) { m->mothurOut("[DEBUG]: after align.\n"); }
289                     
290                     if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
291                     
292                     for(int i=0;i<flowgram.size();i++){
293                         int count = (int)round(100*flowgram[i]);
294                         if(count > 1000){count = 999;}
295                         if(abs(flowgram[i]-refFlow[i])<=0.50){
296                             lookupTable[count][int(refFlow[i])]++;               //  * build table
297                             std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
298                         }
299                     }
300                 }
301                 
302             }
303             m->gobble(errorFile);
304             m->gobble(flowFile);
305         }
306         errorFile.close(); flowFile.close();
307         
308         
309         //get probabilities
310         vector<int> counts; counts.resize(11, 0);
311         int totalCount = 0;
312         for(int i=0;i<1000;i++){
313             for(int j=0;j<11;j++){
314                 counts[j] += lookupTable[i][j];
315                 totalCount += lookupTable[i][j];
316             }
317         }
318         
319         int N = 11;
320         for(int i=0;i<11;i++){
321             if(counts[i] < thresholdCount){ N = i; break; }  //bring back
322             std[i] = sqrt(std[i]/(double)(counts[i]));  //bring back
323         }
324         
325         regress(std, N);  //bring back
326         
327         if (m->control_pressed) { return 0; }
328         
329         double minProbability = 0.1 / (double)totalCount;
330         
331         //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low
332         double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5);
333         
334         for(int i=0;i<1000;i++){
335             if (m->control_pressed) { return 0; }
336             
337             for(int j=0;j<N;j++){
338                 if(lookupTable[i][j] == 0){
339                     lookupTable[i][j] = 1;  //bring back
340                 }
341                 lookupTable[i][j] = -log(lookupTable[i][j]/double(counts[j]));  //bring back
342             }
343             
344             for(int j=N;j<11;j++){  //bring back
345                 double normalProbability = 1.0/((double)std[j] * sqrtTwoPi) * exp(-double(i - j*100)* double(i - j*100)/ double(2*std[j]*std[j]));
346                 if(normalProbability > minProbability){
347                     lookupTable[i][j] = -log(normalProbability);
348                 }
349                 else{
350                     lookupTable[i][j] = -log(minProbability);
351                 }
352             }
353         }
354         
355         
356         //calculate the probability of each homopolymer length
357         vector<double> negLogHomoProb; negLogHomoProb.resize(11, 0.00);  //bring back
358         for(int i=0;i<N;i++){
359             negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
360         }
361         regress(negLogHomoProb, N);
362         
363         if (m->control_pressed) { return 0; }
364         
365         //output data table.  column one is the probability of each homopolymer length
366         map<string, string> variables;
367         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
368                 string outputFile = getOutputFileName("lookup",variables);
369                 outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile);
370         
371         ofstream lookupFile;
372         m->openOutputFile(outputFile, lookupFile);
373         lookupFile.precision(8);
374         
375         for(int j=0;j<11;j++){
376             //        lookupFile << counts[j];
377             lookupFile << showpoint << negLogHomoProb[j]; //bring back
378             for(int i=0;i<1000;i++){
379                 lookupFile << '\t' << lookupTable[i][j];
380             }
381             lookupFile << endl;
382         }
383         lookupFile.close();
384         
385         m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n");
386          
387         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
388                 
389                 m->mothurOutEndLine();
390                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
391                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
392                 m->mothurOutEndLine();
393         
394         return 0;
395     }
396         catch(exception& e) {
397                 m->errorOut(e, "MakeLookupCommand", "execute");
398                 exit(1);
399         }
400 }
401 //******************************************************************************************************************************
402
403 vector<double> MakeLookupCommand::convertSeqToFlow(string sequence, string order){
404     try {
405         int seqLength = (int)sequence.length();
406         int numFlows = (int)order.length();
407         vector<double> flowgram;
408         
409         int orderIndex = 0;
410         int sequenceIndex = 0;
411         
412         while(orderIndex < numFlows && sequenceIndex < seqLength){
413             
414             if (m->control_pressed) { return flowgram; }
415             
416             int homopolymerLength = 1;
417             
418             char base = sequence[sequenceIndex];
419             
420             while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){
421                 homopolymerLength++;
422                 sequenceIndex++;
423             }
424             
425             sequenceIndex++;
426             
427             for(int i=orderIndex; i<orderIndex+numFlows;i++){
428                 if(order[i%numFlows] == base){
429                     //flowgram[i] = homopolymerLength;
430                     orderIndex = i%numFlows;
431                     break;
432                 }else {  flowgram.push_back(0); }
433             }
434             
435             //flowgram[orderIndex] = homopolymerLength;
436             flowgram.push_back(homopolymerLength);
437             
438             orderIndex++;
439             orderIndex = orderIndex % numFlows;
440         }
441         
442         return flowgram;
443     }
444         catch(exception& e) {
445                 m->errorOut(e, "MakeLookupCommand", "convertSeqToFlow");
446                 exit(1);
447         }
448 }
449 //******************************************************************************************************************************
450
451 int MakeLookupCommand::alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder){
452     try {
453         int numQueryFlows = (int)flowgram.size();
454         int numRefFlows = (int)refFlow.size();
455         
456             //cout << numQueryFlows << '\t' << numRefFlows << endl;
457         
458         vector<vector<double> > scoreMatrix; scoreMatrix.resize(numQueryFlows+1);
459         vector<vector<char> > directMatrix; directMatrix.resize(numQueryFlows+1);
460         
461         for(int i=0;i<=numQueryFlows;i++){
462             if (m->control_pressed) { return 0; }
463             scoreMatrix[i].resize(numRefFlows+1, 0.00);
464             directMatrix[i].resize(numRefFlows+1, 'x');
465             
466             scoreMatrix[i][0] = i * gapOpening;
467             directMatrix[i][0] = 'u';
468         }
469         
470             //cout << numQueryFlows << '\t' << numRefFlows << endl;
471         
472         
473         for(int i=0;i<=numRefFlows;i++){
474             scoreMatrix[0][i] = i * gapOpening;
475             directMatrix[0][i] = 'l';
476         }
477         
478         
479         for(int i=1;i<=numQueryFlows;i++){
480             for(int j=1;j<=numRefFlows;j++){
481                 if (m->control_pressed) { return 0; }
482                 double diagonal = 1000000000;
483                 if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){
484                     diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]];
485                 }
486                 double up = scoreMatrix[i-1][j] + gapOpening;
487                 double left = scoreMatrix[i][j-1] + gapOpening;
488                 
489                 double minScore = diagonal;
490                 char direction = 'd';
491                 
492                 if(left < diagonal && left < up){
493                     minScore = left;
494                     direction = 'l';
495                 }
496                 else if(up < diagonal && up < left){
497                     minScore = up;
498                     direction = 'u';
499                 }
500                 
501                 scoreMatrix[i][j] = minScore;
502                 directMatrix[i][j] = direction;
503                 
504             }
505         }
506         
507         int minRowIndex = numQueryFlows;
508         double minRowScore = scoreMatrix[numQueryFlows][numRefFlows];
509         for(int i=0;i<numQueryFlows;i++){
510             if (m->control_pressed) { return 0; }
511             if(scoreMatrix[i][numRefFlows] < minRowScore){
512                 minRowScore = scoreMatrix[i][numRefFlows];
513                 minRowIndex = i;
514             }
515         }
516         
517         int minColumnIndex = numRefFlows;
518         double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
519         for(int i=0;i<numRefFlows;i++){
520             if (m->control_pressed) { return 0; }
521             if(scoreMatrix[numQueryFlows][i] < minColumnScore){
522                 minColumnScore = scoreMatrix[numQueryFlows][i];
523                 minColumnIndex = i;
524             }
525         }
526         
527         
528         int i=minRowIndex;
529         int j= minColumnIndex;
530         
531         vector<double> newFlowgram;
532         vector<double> newRefFlowgram;
533        
534         while(i > 0 && j > 0){
535             if (m->control_pressed) { return 0; }
536             if(directMatrix[i][j] == 'd'){
537                 newFlowgram.push_back(flowgram[i-1]);
538                 newRefFlowgram.push_back(refFlow[j-1]);
539                 
540                 i--;
541                 j--;
542             }
543             else if(directMatrix[i][j] == 'l'){
544                 newFlowgram.push_back(0);
545                 newRefFlowgram.push_back(refFlow[j-1]);
546                 
547                 j--;
548             }
549             else if(directMatrix[i][j] == 'u'){
550                 newFlowgram.push_back(flowgram[i-1]);
551                 newRefFlowgram.push_back(0);
552                 
553                 i--;
554             }
555         }
556         
557         flowgram = newFlowgram;
558         refFlow = newRefFlowgram;
559         
560         return 0;
561     
562     }
563         catch(exception& e) {
564                 m->errorOut(e, "MakeLookupCommand", "alignFlowGrams");
565                 exit(1);
566         }
567 }
568
569 //******************************************************************************************************************************
570
571 int MakeLookupCommand::regress(vector<double>& data, int N){
572     try {
573         //fit data for larger values of N
574         double xMean = 0;
575         double yMean = 0;
576         
577         for(int i=1;i<N;i++){
578             if (m->control_pressed) { return 0; }
579             xMean += i;
580             yMean += data[i];
581         }
582         xMean /= (N-1);
583         yMean /= (N-1);
584         
585         double numerator = 0;
586         double denomenator = 0;
587         for(int i=1;i<N;i++){
588             if (m->control_pressed) { return 0; }
589             numerator += (i-xMean)*(data[i] - yMean);
590             denomenator += (i-xMean) * (i-xMean);
591         }
592         double slope = numerator / denomenator;
593         double intercept = yMean - slope * xMean;
594         
595         for(int i=N;i<11;i++){
596             data[i] = intercept + i * slope;
597         }
598         
599         return 0;
600     }
601         catch(exception& e) {
602                 m->errorOut(e, "MakeLookupCommand", "regress");
603                 exit(1);
604         }
605 }
606
607 //******************************************************************************************************************************
608
609 //**********************************************************************************************************************
610
611