]> git.donarmstrong.com Git - mothur.git/blob - trimflowscommand.cpp
added Jensen-Shannon calc. working on get.communitytype command. fixed bug in get...
[mothur.git] / trimflowscommand.cpp
1 /*
2  *  trimflowscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimflowscommand.h"
11 #include "needlemanoverlap.hpp"
12
13
14 //**********************************************************************************************************************
15 vector<string> TrimFlowsCommand::setParameters(){       
16         try {
17                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos);
19                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
20                 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
21                 CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows);
22                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
23                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
24         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
25                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
28                 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
29                 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
30                 CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles);
31         CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
32                 CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta);
33                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
34                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
35                 
36                 vector<string> myArray;
37                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
38                 return myArray;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "TrimFlowsCommand", "setParameters");
42                 exit(1);
43         }
44 }
45 //**********************************************************************************************************************
46 string TrimFlowsCommand::getHelpString(){       
47         try {
48                 string helpString = "";
49                 helpString += "The trim.flows command reads a flowgram file and creates .....\n";
50         helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "TrimFlowsCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 string TrimFlowsCommand::getOutputPattern(string type) {
62     try {
63         string pattern = "";
64         
65         if (type == "flow") {  pattern = "[filename],[tag],flow"; } 
66         else if (type == "fasta") {  pattern = "[filename],flow.fasta"; } 
67         else if (type == "file") {  pattern = "[filename],flow.files"; }
68         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
69         
70         return pattern;
71     }
72     catch(exception& e) {
73         m->errorOut(e, "TrimFlowsCommand", "getOutputPattern");
74         exit(1);
75     }
76 }
77 //**********************************************************************************************************************
78
79 TrimFlowsCommand::TrimFlowsCommand(){   
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["flow"] = tempOutNames;
85                 outputTypes["fasta"] = tempOutNames;
86         outputTypes["file"] = tempOutNames;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
90                 exit(1);
91         }
92 }
93 //**********************************************************************************************************************
94
95 TrimFlowsCommand::TrimFlowsCommand(string option)  {
96         try {
97                 
98                 abort = false; calledHelp = false;   
99                 comboStarts = 0;
100                 
101                 //allow user to run help
102                 if(option == "help") { help(); abort = true; calledHelp = true; }
103                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
104                 
105                 else {
106                                                 
107                         vector<string> myArray = setParameters();
108                         
109                         OptionParser parser(option);
110                         map<string,string> parameters = parser.getParameters();
111                         
112                         ValidParameters validParameter;
113                         map<string,string>::iterator it;
114                         
115                         //check to make sure all parameters are valid for command
116                         for (it = parameters.begin(); it != parameters.end(); it++) { 
117                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
118                         }
119                         
120                         //initialize outputTypes
121                         vector<string> tempOutNames;
122                         outputTypes["flow"] = tempOutNames;
123                         outputTypes["fasta"] = tempOutNames;
124             outputTypes["file"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("flow");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
137                                 }
138                                 
139                                 it = parameters.find("oligos");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
145                                 }
146                                 
147                         }
148                         
149                         
150                         //check for required parameters
151                         flowFileName = validParameter.validFile(parameters, "flow", true);
152                         if (flowFileName == "not found") { 
153                                 flowFileName = m->getFlowFile(); 
154                                 if (flowFileName != "") {  m->mothurOut("Using " + flowFileName + " as input file for the flow parameter."); m->mothurOutEndLine(); }
155                                 else { 
156                                         m->mothurOut("No valid current flow file. You must provide a flow file."); m->mothurOutEndLine(); 
157                                         abort = true;
158                                 } 
159                         }else if (flowFileName == "not open") { flowFileName = ""; abort = true; }      
160                         
161                         //if the user changes the output directory command factory will send this info to us in the output parameter 
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
163                                 outputDir = ""; 
164                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
165                         }
166                         
167                         
168                         //check for optional parameter and set defaults
169                         // ...at some point should added some additional type checking...
170                         
171                         string temp;
172                         temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
173                         m->mothurConvert(temp, minFlows);  
174
175                         temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
176                         m->mothurConvert(temp, maxFlows);  
177                         
178                         
179                         temp = validParameter.validFile(parameters, "oligos", true);
180                         if (temp == "not found")        {       oligoFileName = "";             }
181                         else if(temp == "not open")     {       abort = true;                   } 
182                         else                                            {       oligoFileName = temp;   m->setOligosFile(oligoFileName); }
183                         
184                         temp = validParameter.validFile(parameters, "fasta", false);            if (temp == "not found"){       fasta = 0;              }
185                         else if(m->isTrue(temp))        {       fasta = 1;      }
186                         
187                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found"){       temp = "9";             }
188                         m->mothurConvert(temp, maxHomoP);  
189
190                         temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
191                         m->mothurConvert(temp, signal);  
192
193                         temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
194                         m->mothurConvert(temp, noise);  
195         
196                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
197                         m->mothurConvert(temp, bdiffs);
198                         
199                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
200                         m->mothurConvert(temp, pdiffs);
201                         
202             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
203                         m->mothurConvert(temp, ldiffs);
204             
205             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
206                         m->mothurConvert(temp, sdiffs);
207                         
208                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
209                         m->mothurConvert(temp, tdiffs);
210                         
211                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
212
213                         
214                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
215                         m->setProcessors(temp);
216                         m->mothurConvert(temp, processors);
217         
218                         temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){         temp = "A";     }
219             if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
220             }
221             else {
222                 if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
223                 else if(toupper(temp[0]) == 'B'){
224                     flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
225                 else if(toupper(temp[0]) == 'I'){
226                     flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
227                 else {
228                     m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
229                 }
230             }
231             
232                         if(oligoFileName == "") {       allFiles = 0;           }
233                         else                                    {       allFiles = 1;           }
234
235                         numFPrimers = 0;
236                         numRPrimers = 0;
237             numLinkers = 0;
238             numSpacers = 0;
239                 }
240         }
241         catch(exception& e) {
242                 m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
243                 exit(1);
244         }
245 }
246
247 //***************************************************************************************************************
248
249 int TrimFlowsCommand::execute(){
250         try{
251                 
252                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
253
254         map<string, string> variables; 
255                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
256         string fastaFileName = getOutputFileName("fasta",variables);
257                 if(fasta){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); }
258         
259         variables["[tag]"] = "trim";
260                 string trimFlowFileName = getOutputFileName("flow",variables);
261                 outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName);
262                 
263         variables["[tag]"] = "scrap";
264                 string scrapFlowFileName = getOutputFileName("flow",variables);
265                 outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName);
266
267                 
268                 
269                 vector<unsigned long long> flowFilePos;
270         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
271                 flowFilePos = getFlowFileBreaks();
272                 for (int i = 0; i < (flowFilePos.size()-1); i++) {
273                         lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
274                 }       
275         #else
276                 ifstream in; m->openInputFile(flowFileName, in); in >> numFlows; in.close();
277         ///////////////////////////////////////// until I fix multiple processors for windows //////////////////        
278                 processors = 1;
279         ///////////////////////////////////////// until I fix multiple processors for windows //////////////////                
280                 if (processors == 1) {
281                         lines.push_back(new linePair(0, 1000));
282                 }else {
283                         int numFlowLines;
284                         flowFilePos = m->setFilePosEachLine(flowFileName, numFlowLines);
285                         flowFilePos.erase(flowFilePos.begin() + 1); numFlowLines--;
286                         
287                         //figure out how many sequences you have to process
288                         int numSeqsPerProcessor = numFlowLines / processors;
289                         cout << numSeqsPerProcessor << '\t' << numFlowLines << endl;
290                         for (int i = 0; i < processors; i++) {
291                                 int startIndex =  i * numSeqsPerProcessor;
292                                 if(i == (processors - 1)){      numSeqsPerProcessor = numFlowLines - i * numSeqsPerProcessor;   }
293                                 lines.push_back(new linePair(flowFilePos[startIndex], numSeqsPerProcessor));
294                                 cout << flowFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
295                         }
296                 }
297         #endif
298                 
299                 vector<vector<string> > barcodePrimerComboFileNames;
300                 if(oligoFileName != ""){
301                         getOligos(barcodePrimerComboFileNames); 
302                 }
303                 
304                 if(processors == 1){
305                         driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
306                 }else{
307                         createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); 
308                 }       
309                 
310                 if (m->control_pressed) {  return 0; }                  
311                 
312                 string flowFilesFileName;
313                 ofstream output;
314                 
315                 if(allFiles){
316                         set<string> namesAlreadyProcessed;
317                         flowFilesFileName = getOutputFileName("file",variables);
318                         m->openOutputFile(flowFilesFileName, output);
319
320                         for(int i=0;i<barcodePrimerComboFileNames.size();i++){
321                                 for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
322                                         if (namesAlreadyProcessed.count(barcodePrimerComboFileNames[i][j]) == 0) {
323                         if (barcodePrimerComboFileNames[i][j] != "") {
324                             FILE * pFile;
325                             unsigned long long size;
326                             
327                             //get num bytes in file
328                             pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
329                             if (pFile==NULL) perror ("Error opening file");
330                             else{
331                                 fseek (pFile, 0, SEEK_END);
332                                 size=ftell(pFile);
333                                 fclose (pFile);
334                             }
335                             
336                             if(size < 10){
337                                 m->mothurRemove(barcodePrimerComboFileNames[i][j]);
338                             }
339                             else{
340                                 output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl;
341                                 outputNames.push_back(barcodePrimerComboFileNames[i][j]);
342                                 outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
343                             }
344                             namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]);
345                         }
346                                         }
347                                 }
348                         }
349                         output.close();
350                 }
351                 else{
352                         flowFilesFileName = getOutputFileName("file",variables);
353                         m->openOutputFile(flowFilesFileName, output);
354                         
355                         output << m->getFullPathName(trimFlowFileName) << endl;
356                         
357                         output.close();
358                 }
359                 outputTypes["file"].push_back(flowFilesFileName);
360                 outputNames.push_back(flowFilesFileName);
361                         
362                 m->mothurOutEndLine();
363                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
364                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
365                 m->mothurOutEndLine();
366                 
367                 return 0;       
368         }
369         catch(exception& e) {
370                 m->errorOut(e, "TrimSeqsCommand", "execute");
371                 exit(1);
372         }
373 }
374
375 //***************************************************************************************************************
376
377 int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > thisBarcodePrimerComboFileNames, linePair* line){
378         
379         try {
380                 ofstream trimFlowFile;
381                 m->openOutputFile(trimFlowFileName, trimFlowFile);
382                 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
383
384                 ofstream scrapFlowFile;
385                 m->openOutputFile(scrapFlowFileName, scrapFlowFile);
386                 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
387                 
388                 ofstream fastaFile;
389                 if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
390                 
391                 ifstream flowFile;
392                 m->openInputFile(flowFileName, flowFile);
393                 
394                 flowFile.seekg(line->start);
395                 
396                 if(line->start == 0){
397                         flowFile >> numFlows; m->gobble(flowFile);
398                         scrapFlowFile << numFlows << endl;
399                         trimFlowFile << maxFlows << endl;
400                         if(allFiles){
401                                 for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
402                                         for(int j=0;j<thisBarcodePrimerComboFileNames[0].size();j++){
403                         if (thisBarcodePrimerComboFileNames[i][j] != "") {
404                             ofstream temp;
405                             m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp);
406                             temp << maxFlows << endl;
407                             temp.close();
408                         }
409                                         }
410                                 }                       
411                         }
412                 }
413                 
414                 FlowData flowData(numFlows, signal, noise, maxHomoP, flowOrder);
415                 //cout << " driver flowdata address " <<  &flowData  << &flowFile << endl;      
416                 int count = 0;
417                 bool moreSeqs = 1;
418                 
419                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
420                 
421                 while(moreSeqs) {
422                                 
423                         if (m->control_pressed) { break; }
424                         
425                         int success = 1;
426                         int currentSeqDiffs = 0;
427                         string trashCode = "";
428                         
429                         flowData.getNext(flowFile); 
430                         flowData.capFlows(maxFlows);    
431                         
432                         Sequence currSeq = flowData.getSequence();
433             //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
434                         if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
435                                 success = 0;
436                                 trashCode += 'l';
437                         }
438             if(!flowData.hasGoodHomoP()){       //screen to see if sequence meets the maximum homopolymer limit
439                                 success = 0;
440                                 trashCode += 'h';
441                         }
442
443                         int primerIndex = 0;
444                         int barcodeIndex = 0;
445                         
446             if(numLinkers != 0){
447                 success = trimOligos.stripLinker(currSeq);
448                 if(success > ldiffs)            {       trashCode += 'k';       }
449                 else{ currentSeqDiffs += success;  }
450                 
451             }
452             
453             if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); }
454             
455                         if(barcodes.size() != 0){
456                                 success = trimOligos.stripBarcode(currSeq, barcodeIndex);
457                                 if(success > bdiffs)            {       trashCode += 'b';       }
458                                 else{ currentSeqDiffs += success;  }
459                         }
460                         
461             if(numSpacers != 0){
462                 success = trimOligos.stripSpacer(currSeq);
463                 if(success > sdiffs)            {       trashCode += 's';       }
464                 else{ currentSeqDiffs += success;  }
465                 
466             }
467             
468                         if(numFPrimers != 0){
469                                 success = trimOligos.stripForward(currSeq, primerIndex);
470                                 if(success > pdiffs)            {       trashCode += 'f';       }
471                                 else{ currentSeqDiffs += success;  }
472                         }
473                         
474                         if (currentSeqDiffs > tdiffs)   {       trashCode += 't';   }
475                         
476                         if(numRPrimers != 0){
477                                 success = trimOligos.stripReverse(currSeq);
478                                 if(!success)                            {       trashCode += 'r';       }
479                         }
480                         
481                         if(trashCode.length() == 0){
482                 string thisGroup = "";
483                 if(barcodes.size() != 0){
484                     thisGroup = barcodeNameVector[barcodeIndex];
485                     if (primers.size() != 0) { 
486                         if (primerNameVector[primerIndex] != "") { 
487                             if(thisGroup != "") {
488                                 thisGroup += "." + primerNameVector[primerIndex]; 
489                             }else {
490                                 thisGroup = primerNameVector[primerIndex]; 
491                             }
492                         } 
493                     }
494                 }
495                 
496                 int pos = thisGroup.find("ignore");
497                 if (pos == string::npos) {              
498                     flowData.printFlows(trimFlowFile);
499                     
500                     if(fasta)   { currSeq.printSequence(fastaFile);     }
501                     
502                     if(allFiles){
503                         ofstream output;
504                         m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
505                         output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
506                         
507                         flowData.printFlows(output);
508                         output.close();
509                     }
510                 }
511                         }
512                         else{
513                                 flowData.printFlows(scrapFlowFile, trashCode);
514                         }
515                                 
516                         count++;
517                         //cout << "driver" << '\t' << currSeq.getName() << endl;                        
518                         //report progress
519                         if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
520
521 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
522                         unsigned long long pos = flowFile.tellg();
523
524                         if ((pos == -1) || (pos >= line->end)) { break; }
525 #else
526                         if (flowFile.eof()) { break; }
527 #endif
528                         
529                 }
530                 //report progress
531                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
532                 
533                 trimFlowFile.close();
534                 scrapFlowFile.close();
535                 flowFile.close();
536                 if(fasta){      fastaFile.close();      }
537                 
538                 return count;
539         }
540         catch(exception& e) {
541                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
542                 exit(1);
543         }
544 }
545
546 //***************************************************************************************************************
547
548 void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
549         try {
550                 ifstream oligosFile;
551                 m->openInputFile(oligoFileName, oligosFile);
552                 
553                 string type, oligo, group;
554
555                 int indexPrimer = 0;
556                 int indexBarcode = 0;
557                 
558                 while(!oligosFile.eof()){
559                 
560                         oligosFile >> type;     //get the first column value of the row - is it a comment or a feature we are interested in?
561             
562             if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); }
563             
564                         if(type[0] == '#'){     //igore the line because there's a comment
565                                 while (!oligosFile.eof())       {       char c = oligosFile.get(); if (c == 10 || c == 13){     break;  }       }
566                 m->gobble(oligosFile);// get rest of line if there's any crap there
567                         }
568                         else{                           //there's a feature we're interested in
569                 m->gobble(oligosFile);
570                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }                                  //make type case insensitive
571
572                                 oligosFile >> oligo;    //get the DNA sequence for the feature
573
574                                 for(int i=0;i<oligo.length();i++){      //make type case insensitive and change any U's to T's
575                                         oligo[i] = toupper(oligo[i]);
576                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
577                                 }
578                 
579                 if (m->debug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); }
580                 
581                                 if(type == "FORWARD"){  //if the feature is a forward primer...
582                                         group = "";
583
584                                         while (!oligosFile.eof())       {       // get rest of line in case there is a primer name = will have the name of the primer
585                                                 char c = oligosFile.get(); 
586                                                 if (c == 10 || c == 13 || c == -1){     break;  }
587                                                 else if (c == 32 || c == 9){;} //space or tab
588                                                 else {  group += c;  }
589                                         } 
590
591                                         //have we seen this primer already?
592                                         map<string, int>::iterator itPrimer = primers.find(oligo);
593                                         if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
594
595                                         primers[oligo]=indexPrimer; indexPrimer++;
596                                         primerNameVector.push_back(group);
597
598                                 }
599                                 else if(type == "REVERSE"){
600                                         string oligoRC = reverseOligo(oligo);
601                                         revPrimer.push_back(oligoRC);
602                     if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); }
603                                 }
604                                 else if(type == "BARCODE"){
605                                         oligosFile >> group;
606
607                                         //check for repeat barcodes
608                                         map<string, int>::iterator itBar = barcodes.find(oligo);
609                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
610                     
611                     if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); }
612                     
613                                         barcodes[oligo]=indexBarcode; indexBarcode++;
614                                         barcodeNameVector.push_back(group);
615                                 }else if(type == "LINKER"){
616                                         linker.push_back(oligo);
617                                 }else if(type == "SPACER"){
618                                         spacer.push_back(oligo);
619                                 }
620                                 else{
621                                         m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  
622                                 }
623                         }
624
625                         m->gobble(oligosFile);
626                 }
627                 oligosFile.close();
628                 
629                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
630                 
631                 //add in potential combos
632                 if(barcodeNameVector.size() == 0){
633                         barcodes[""] = 0;
634                         barcodeNameVector.push_back("");                        
635                 }
636                 
637                 if(primerNameVector.size() == 0){
638                         primers[""] = 0;
639                         primerNameVector.push_back("");                 
640                 }
641
642                 outFlowFileNames.resize(barcodeNameVector.size());
643                 for(int i=0;i<outFlowFileNames.size();i++){
644                         outFlowFileNames[i].assign(primerNameVector.size(), "");
645                 }
646                 
647                 if(allFiles){
648
649                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
650                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
651
652                                         string primerName = primerNameVector[itPrimer->second];
653                                         string barcodeName = barcodeNameVector[itBar->second];
654                     
655                                         if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
656                                         else {                                  
657                         string comboGroupName = "";
658                         string fileName = "";
659                         
660                         map<string, string> variables; 
661                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
662                         
663                         if(primerName == ""){
664                             comboGroupName = barcodeNameVector[itBar->second];
665                             variables["[tag]"] = comboGroupName;
666                             fileName = getOutputFileName("flow", variables);
667                         }
668                         else{
669                             if(barcodeName == ""){
670                                 comboGroupName = primerNameVector[itPrimer->second];
671                             }
672                             else{
673                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
674                             }
675                             variables["[tag]"] = comboGroupName;
676                             fileName = getOutputFileName("flow", variables);
677                         }
678                         
679                         outFlowFileNames[itBar->second][itPrimer->second] = fileName;
680                         
681                         ofstream temp;
682                         m->openOutputFile(fileName, temp);
683                         temp.close();
684                     }
685                                 }
686                         }
687                 }
688                 
689                 numFPrimers = primers.size();
690                 numRPrimers = revPrimer.size();
691         numLinkers = linker.size();
692         numSpacers = spacer.size();
693                 
694         }
695         catch(exception& e) {
696                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
697                 exit(1);
698         }
699 }
700 //********************************************************************/
701 string TrimFlowsCommand::reverseOligo(string oligo){
702         try {
703         string reverse = "";
704         
705         for(int i=oligo.length()-1;i>=0;i--){
706             
707             if(oligo[i] == 'A')         {       reverse += 'T'; }
708             else if(oligo[i] == 'T'){   reverse += 'A'; }
709             else if(oligo[i] == 'U'){   reverse += 'A'; }
710             
711             else if(oligo[i] == 'G'){   reverse += 'C'; }
712             else if(oligo[i] == 'C'){   reverse += 'G'; }
713             
714             else if(oligo[i] == 'R'){   reverse += 'Y'; }
715             else if(oligo[i] == 'Y'){   reverse += 'R'; }
716             
717             else if(oligo[i] == 'M'){   reverse += 'K'; }
718             else if(oligo[i] == 'K'){   reverse += 'M'; }
719             
720             else if(oligo[i] == 'W'){   reverse += 'W'; }
721             else if(oligo[i] == 'S'){   reverse += 'S'; }
722             
723             else if(oligo[i] == 'B'){   reverse += 'V'; }
724             else if(oligo[i] == 'V'){   reverse += 'B'; }
725             
726             else if(oligo[i] == 'D'){   reverse += 'H'; }
727             else if(oligo[i] == 'H'){   reverse += 'D'; }
728             
729             else                                                {       reverse += 'N'; }
730         }
731         
732         
733         return reverse;
734     }
735         catch(exception& e) {
736                 m->errorOut(e, "TrimFlowsCommand", "reverseOligo");
737                 exit(1);
738         }
739 }
740
741 /**************************************************************************************************/
742 vector<unsigned long long> TrimFlowsCommand::getFlowFileBreaks() {
743
744         try{
745                         
746                 vector<unsigned long long> filePos;
747                 filePos.push_back(0);
748                                         
749                 FILE * pFile;
750                 unsigned long long size;
751                 
752                 //get num bytes in file
753                 pFile = fopen (flowFileName.c_str(),"rb");
754                 if (pFile==NULL) perror ("Error opening file");
755                 else{
756                         fseek (pFile, 0, SEEK_END);
757                         size=ftell (pFile);
758                         fclose (pFile);
759                 }
760                                 
761                 //estimate file breaks
762                 unsigned long long chunkSize = 0;
763                 chunkSize = size / processors;
764
765                 //file too small to divide by processors
766                 if (chunkSize == 0)  {  processors = 1; filePos.push_back(size); return filePos;        }
767                 
768                 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
769                 for (int i = 0; i < processors; i++) {
770                         unsigned long long spot = (i+1) * chunkSize;
771                         
772                         ifstream in;
773                         m->openInputFile(flowFileName, in);
774                         in.seekg(spot);
775                         
776                         string dummy = m->getline(in);
777                         
778                         //there was not another sequence before the end of the file
779                         unsigned long long sanityPos = in.tellg();
780                         
781 //                      if (sanityPos == -1) {  break;  }
782 //                      else {  filePos.push_back(newSpot);  }
783                         if (sanityPos == -1) {  break;  }
784                         else {  filePos.push_back(sanityPos);  }
785                         
786                         in.close();
787                 }
788                 
789                 //save end pos
790                 filePos.push_back(size);
791                 
792                 //sanity check filePos
793                 for (int i = 0; i < (filePos.size()-1); i++) {
794                         if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
795                 }
796
797                 ifstream in;
798                 m->openInputFile(flowFileName, in);
799                 in >> numFlows;
800                 m->gobble(in);
801                 //unsigned long long spot = in.tellg();
802                 //filePos[0] = spot;
803                 in.close();
804                 
805                 processors = (filePos.size() - 1);
806                 
807                 return filePos; 
808         }
809         catch(exception& e) {
810                 m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks");
811                 exit(1);
812         }
813 }
814
815 /**************************************************************************************************/
816
817 int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames){
818
819         try {
820                 processIDS.clear();
821                 int exitCommand = 1;
822                 
823 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
824                 int process = 1;
825                 
826                 //loop through and create all the processes you want
827                 while (process != processors) {
828                         int pid = fork();
829                         
830                         if (pid > 0) {
831                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
832                                 process++;
833                         }else if (pid == 0){
834                                 
835                                 vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
836                                 if(allFiles){
837                                         for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
838                                                 for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
839                             if (tempBarcodePrimerComboFileNames[i][j] != "") {
840                                 tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
841                                 ofstream temp;
842                                 m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
843                                 temp.close();
844                             }
845                                                 }
846                                         }
847                                 }
848                                 driverCreateTrim(flowFileName,
849                                                                  (trimFlowFileName + toString(getpid()) + ".temp"),
850                                                                  (scrapFlowFileName + toString(getpid()) + ".temp"),
851                                                                  (fastaFileName + toString(getpid()) + ".temp"),
852                                                                  tempBarcodePrimerComboFileNames, lines[process]);
853
854                                 exit(0);
855                         }else { 
856                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
857                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
858                                 exit(0);
859                         }
860                 }
861                 
862                 //parent do my part
863                 ofstream temp;
864                 m->openOutputFile(trimFlowFileName, temp);
865                 temp.close();
866
867                 m->openOutputFile(scrapFlowFileName, temp);
868                 temp.close();
869                 
870                 if(fasta){
871                         m->openOutputFile(fastaFileName, temp);
872                         temp.close();
873                 }
874                 
875                 driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
876
877                 //force parent to wait until all the processes are done
878                 for (int i=0;i<processIDS.size();i++) { 
879                         int temp = processIDS[i];
880                         wait(&temp);
881                 }
882 #else
883                 //////////////////////////////////////////////////////////////////////////////////////////////////////
884                 //Windows version shared memory, so be careful when passing variables through the trimFlowData struct. 
885                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
886                 //////////////////////////////////////////////////////////////////////////////////////////////////////
887                 
888                 vector<trimFlowData*> pDataArray; 
889                 DWORD   dwThreadIdArray[processors-1];
890                 HANDLE  hThreadArray[processors-1]; 
891                 
892                 //Create processor worker threads.
893                 for( int i=0; i<processors-1; i++ ){
894                         // Allocate memory for thread data.
895                         string extension = "";
896                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
897                         
898                         vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
899                         if(allFiles){
900                                 for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
901                                         for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
902                         if (tempBarcodePrimerComboFileNames[i][j] != "") {
903                             tempBarcodePrimerComboFileNames[i][j] += extension;
904                             ofstream temp;
905                             m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
906                             temp.close();
907                                                 }
908                                         }
909                                 }
910                         }
911                         
912                         trimFlowData* tempflow = new trimFlowData(flowFileName, (trimFlowFileName + extension), (scrapFlowFileName + extension), fastaFileName, flowOrder, tempBarcodePrimerComboFileNames, barcodes, primers, revPrimer, fasta, allFiles, lines[i]->start, lines[i]->end, m, signal, noise, numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, i);
913                         pDataArray.push_back(tempflow);
914                         
915                         //MyTrimFlowThreadFunction is in header. It must be global or static to work with the threads.
916                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
917                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimFlowThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
918                 }
919                 
920                 //using the main process as a worker saves time and memory
921                 ofstream temp;
922                 m->openOutputFile(trimFlowFileName, temp);
923                 temp.close();
924                 
925                 m->openOutputFile(scrapFlowFileName, temp);
926                 temp.close();
927                 
928                 if(fasta){
929                         m->openOutputFile(fastaFileName, temp);
930                         temp.close();
931                 }
932                 
933                 vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
934                 if(allFiles){
935                         for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
936                                 for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
937                     if (tempBarcodePrimerComboFileNames[i][j] != "") {
938                         tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp";
939                         ofstream temp;
940                         m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
941                         temp.close();
942                     }
943                                         
944                                 }
945                         }
946                 }
947                 
948                 //do my part - do last piece because windows is looking for eof
949                 int num = driverCreateTrim(flowFileName, (trimFlowFileName  + toString(processors-1) + ".temp"), (scrapFlowFileName  + toString(processors-1) + ".temp"), (fastaFileName + toString(processors-1) + ".temp"), tempBarcodePrimerComboFileNames, lines[processors-1]);
950                 processIDS.push_back((processors-1)); 
951                 
952                 //Wait until all threads have terminated.
953                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
954                 
955                 //Close all thread handles and free memory allocations.
956                 for(int i=0; i < pDataArray.size(); i++){
957                         num += pDataArray[i]->count;
958                         CloseHandle(hThreadArray[i]);
959                         delete pDataArray[i];
960                 }
961                 
962                 
963 #endif  
964                 //append files
965                 m->mothurOutEndLine();
966                 for(int i=0;i<processIDS.size();i++){
967                         
968                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
969                         
970                         m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName);
971                         m->mothurRemove((trimFlowFileName + toString(processIDS[i]) + ".temp"));
972 //                      m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine();
973
974                         m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName);
975                         m->mothurRemove((scrapFlowFileName + toString(processIDS[i]) + ".temp"));
976 //                      m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine();
977
978                         if(fasta){
979                                 m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName);
980                                 m->mothurRemove((fastaFileName + toString(processIDS[i]) + ".temp"));
981 //                              m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine();
982                         }
983                         if(allFiles){                                           
984                                 for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) {
985                                         for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) {
986                         if (barcodePrimerComboFileNames[j][k] != "") {
987                             m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]);
988                             m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"));
989                         }
990                                         }
991                                 }
992                         }
993                 }
994                 
995                 return exitCommand;
996         
997         }
998         catch(exception& e) {
999                 m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim");
1000                 exit(1);
1001         }
1002 }
1003
1004 //***************************************************************************************************************