]> git.donarmstrong.com Git - mothur.git/blob - trimflowscommand.cpp
added modify names parameter to set.dir
[mothur.git] / trimflowscommand.cpp
1 /*
2  *  trimflowscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimflowscommand.h"
11 #include "needlemanoverlap.hpp"
12
13
14 //**********************************************************************************************************************
15 vector<string> TrimFlowsCommand::setParameters(){       
16         try {
17                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos);
19                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
20                 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
21                 CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows);
22                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
23                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
24         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
25                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
28                 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
29                 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
30                 CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles);
31         CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
32                 CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta);
33                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
34                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
35                 
36                 vector<string> myArray;
37                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
38                 return myArray;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "TrimFlowsCommand", "setParameters");
42                 exit(1);
43         }
44 }
45 //**********************************************************************************************************************
46 string TrimFlowsCommand::getHelpString(){       
47         try {
48                 string helpString = "";
49                 helpString += "The trim.flows command reads a flowgram file and creates .....\n";
50         helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "TrimFlowsCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 string TrimFlowsCommand::getOutputPattern(string type) {
62     try {
63         string pattern = "";
64         
65         if (type == "flow") {  pattern = "[filename],[tag],flow"; } 
66         else if (type == "fasta") {  pattern = "[filename],flow.fasta"; } 
67         else if (type == "file") {  pattern = "[filename],flow.files"; }
68         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
69         
70         return pattern;
71     }
72     catch(exception& e) {
73         m->errorOut(e, "TrimFlowsCommand", "getOutputPattern");
74         exit(1);
75     }
76 }
77 //**********************************************************************************************************************
78
79 TrimFlowsCommand::TrimFlowsCommand(){   
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["flow"] = tempOutNames;
85                 outputTypes["fasta"] = tempOutNames;
86         outputTypes["file"] = tempOutNames;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
90                 exit(1);
91         }
92 }
93 //**********************************************************************************************************************
94
95 TrimFlowsCommand::TrimFlowsCommand(string option)  {
96         try {
97                 
98                 abort = false; calledHelp = false;   
99                 comboStarts = 0;
100                 
101                 //allow user to run help
102                 if(option == "help") { help(); abort = true; calledHelp = true; }
103                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
104                 
105                 else {
106                                                 
107                         vector<string> myArray = setParameters();
108                         
109                         OptionParser parser(option);
110                         map<string,string> parameters = parser.getParameters();
111                         
112                         ValidParameters validParameter;
113                         map<string,string>::iterator it;
114                         
115                         //check to make sure all parameters are valid for command
116                         for (it = parameters.begin(); it != parameters.end(); it++) { 
117                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
118                         }
119                         
120                         //initialize outputTypes
121                         vector<string> tempOutNames;
122                         outputTypes["flow"] = tempOutNames;
123                         outputTypes["fasta"] = tempOutNames;
124             outputTypes["file"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("flow");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
137                                 }
138                                 
139                                 it = parameters.find("oligos");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
145                                 }
146                                 
147                         }
148                         
149                         
150                         //check for required parameters
151                         flowFileName = validParameter.validFile(parameters, "flow", true);
152                         if (flowFileName == "not found") { 
153                                 flowFileName = m->getFlowFile(); 
154                                 if (flowFileName != "") {  m->mothurOut("Using " + flowFileName + " as input file for the flow parameter."); m->mothurOutEndLine(); }
155                                 else { 
156                                         m->mothurOut("No valid current flow file. You must provide a flow file."); m->mothurOutEndLine(); 
157                                         abort = true;
158                                 } 
159                         }else if (flowFileName == "not open") { flowFileName = ""; abort = true; }      
160                         
161                         //if the user changes the output directory command factory will send this info to us in the output parameter 
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
163                                 outputDir = ""; 
164                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
165                         }
166                         
167                         
168                         //check for optional parameter and set defaults
169                         // ...at some point should added some additional type checking...
170                         
171                         string temp;
172                         temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
173                         m->mothurConvert(temp, minFlows);  
174
175                         temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
176                         m->mothurConvert(temp, maxFlows);  
177                         
178                         
179                         temp = validParameter.validFile(parameters, "oligos", true);
180                         if (temp == "not found")        {       oligoFileName = "";             }
181                         else if(temp == "not open")     {       abort = true;                   } 
182                         else                                            {       oligoFileName = temp;   m->setOligosFile(oligoFileName); }
183                         
184                         temp = validParameter.validFile(parameters, "fasta", false);            if (temp == "not found"){       fasta = 0;              }
185                         else if(m->isTrue(temp))        {       fasta = 1;      }
186                         
187                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found"){       temp = "9";             }
188                         m->mothurConvert(temp, maxHomoP);  
189
190                         temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
191                         m->mothurConvert(temp, signal);  
192
193                         temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
194                         m->mothurConvert(temp, noise);  
195         
196                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
197                         m->mothurConvert(temp, bdiffs);
198                         
199                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
200                         m->mothurConvert(temp, pdiffs);
201                         
202             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
203                         m->mothurConvert(temp, ldiffs);
204             
205             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
206                         m->mothurConvert(temp, sdiffs);
207                         
208                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
209                         m->mothurConvert(temp, tdiffs);
210                         
211                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
212
213                         
214                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
215                         m->setProcessors(temp);
216                         m->mothurConvert(temp, processors);
217         
218                         temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){         temp = "A";     }
219             if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
220             }
221             else {
222                 if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
223                 else if(toupper(temp[0]) == 'B'){
224                     flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
225                 else if(toupper(temp[0]) == 'I'){
226                     flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
227                 else {
228                     m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
229                 }
230             }
231             
232                         if(oligoFileName == "") {       allFiles = 0;           }
233                         else                                    {       allFiles = 1;           }
234
235                         numFPrimers = 0;
236                         numRPrimers = 0;
237             numLinkers = 0;
238             numSpacers = 0;
239                 }
240         }
241         catch(exception& e) {
242                 m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
243                 exit(1);
244         }
245 }
246
247 //***************************************************************************************************************
248
249 int TrimFlowsCommand::execute(){
250         try{
251                 
252                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
253
254         map<string, string> variables; 
255                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
256         string fastaFileName = getOutputFileName("fasta",variables);
257                 if(fasta){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); }
258         
259         variables["[tag]"] = "trim";
260                 string trimFlowFileName = getOutputFileName("flow",variables);
261                 outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName);
262                 
263         variables["[tag]"] = "scrap";
264                 string scrapFlowFileName = getOutputFileName("flow",variables);
265                 outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName);
266
267                 
268                 
269                 vector<unsigned long long> flowFilePos;
270         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
271                 flowFilePos = getFlowFileBreaks();
272                 for (int i = 0; i < (flowFilePos.size()-1); i++) {
273                         lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
274                 }       
275         #else
276                 ifstream in; m->openInputFile(flowFileName, in); in >> numFlows; in.close();
277         ///////////////////////////////////////// until I fix multiple processors for windows //////////////////        
278                 processors = 1;
279         ///////////////////////////////////////// until I fix multiple processors for windows //////////////////                
280                 if (processors == 1) {
281                         lines.push_back(new linePair(0, 1000));
282                 }else {
283                         int numFlowLines;
284                         flowFilePos = m->setFilePosEachLine(flowFileName, numFlowLines);
285                         flowFilePos.erase(flowFilePos.begin() + 1); numFlowLines--;
286                         
287                         //figure out how many sequences you have to process
288                         int numSeqsPerProcessor = numFlowLines / processors;
289                         cout << numSeqsPerProcessor << '\t' << numFlowLines << endl;
290                         for (int i = 0; i < processors; i++) {
291                                 int startIndex =  i * numSeqsPerProcessor;
292                                 if(i == (processors - 1)){      numSeqsPerProcessor = numFlowLines - i * numSeqsPerProcessor;   }
293                                 lines.push_back(new linePair(flowFilePos[startIndex], numSeqsPerProcessor));
294                                 cout << flowFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
295                         }
296                 }
297         #endif
298                 
299                 vector<vector<string> > barcodePrimerComboFileNames;
300                 if(oligoFileName != ""){
301                         getOligos(barcodePrimerComboFileNames); 
302                 }
303                 
304                 if(processors == 1){
305                         driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
306                 }else{
307                         createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); 
308                 }       
309                 
310                 if (m->control_pressed) {  return 0; }                  
311                 
312                 string flowFilesFileName;
313                 ofstream output;
314                 
315                 if(allFiles){
316                         set<string> namesAlreadyProcessed;
317                         flowFilesFileName = getOutputFileName("file",variables);
318                         m->openOutputFile(flowFilesFileName, output);
319
320                         for(int i=0;i<barcodePrimerComboFileNames.size();i++){
321                                 for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
322                                         if (namesAlreadyProcessed.count(barcodePrimerComboFileNames[i][j]) == 0) {
323                         if (barcodePrimerComboFileNames[i][j] != "") {
324                             FILE * pFile;
325                             unsigned long long size;
326                             
327                             //get num bytes in file
328                             pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
329                             if (pFile==NULL) perror ("Error opening file");
330                             else{
331                                 fseek (pFile, 0, SEEK_END);
332                                 size=ftell(pFile);
333                                 fclose (pFile);
334                             }
335                             
336                             if(size < 10){
337                                 m->mothurRemove(barcodePrimerComboFileNames[i][j]);
338                             }
339                             else{
340                                 output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl;
341                                 outputNames.push_back(barcodePrimerComboFileNames[i][j]);
342                                 outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
343                             }
344                             namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]);
345                         }
346                                         }
347                                 }
348                         }
349                         output.close();
350                 }
351                 else{
352                         flowFilesFileName = getOutputFileName("file",variables);
353                         m->openOutputFile(flowFilesFileName, output);
354                         
355                         output << m->getFullPathName(trimFlowFileName) << endl;
356                         
357                         output.close();
358                 }
359                 outputTypes["file"].push_back(flowFilesFileName);
360                 outputNames.push_back(flowFilesFileName);
361                         
362                 m->mothurOutEndLine();
363                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
364                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
365                 m->mothurOutEndLine();
366                 
367                 return 0;       
368         }
369         catch(exception& e) {
370                 m->errorOut(e, "TrimSeqsCommand", "execute");
371                 exit(1);
372         }
373 }
374
375 //***************************************************************************************************************
376
377 int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > thisBarcodePrimerComboFileNames, linePair* line){
378         
379         try {
380                 ofstream trimFlowFile;
381                 m->openOutputFile(trimFlowFileName, trimFlowFile);
382                 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
383
384                 ofstream scrapFlowFile;
385                 m->openOutputFile(scrapFlowFileName, scrapFlowFile);
386                 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
387                 
388                 ofstream fastaFile;
389                 if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
390                 
391                 ifstream flowFile;
392                 m->openInputFile(flowFileName, flowFile);
393                 
394                 flowFile.seekg(line->start);
395                 
396                 if(line->start == 0){
397                         flowFile >> numFlows; m->gobble(flowFile);
398                         scrapFlowFile << maxFlows << endl;
399                         trimFlowFile << maxFlows << endl;
400                         if(allFiles){
401                                 for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
402                                         for(int j=0;j<thisBarcodePrimerComboFileNames[0].size();j++){
403                         if (thisBarcodePrimerComboFileNames[i][j] != "") {
404                             ofstream temp;
405                             m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp);
406                             temp << maxFlows << endl;
407                             temp.close();
408                         }
409                                         }
410                                 }                       
411                         }
412                 }
413                 
414                 FlowData flowData(numFlows, signal, noise, maxHomoP, flowOrder);
415                 //cout << " driver flowdata address " <<  &flowData  << &flowFile << endl;      
416                 int count = 0;
417                 bool moreSeqs = 1;
418                 
419                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
420                 
421                 while(moreSeqs) {
422                                 
423                         if (m->control_pressed) { break; }
424                         
425                         int success = 1;
426                         int currentSeqDiffs = 0;
427                         string trashCode = "";
428                         
429                         flowData.getNext(flowFile); 
430                         flowData.capFlows(maxFlows);    
431                         
432                         Sequence currSeq = flowData.getSequence();
433             //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
434                         if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
435                                 success = 0;
436                                 trashCode += 'l';
437                         }
438                         
439                         int primerIndex = 0;
440                         int barcodeIndex = 0;
441                         
442             if(numLinkers != 0){
443                 success = trimOligos.stripLinker(currSeq);
444                 if(success > ldiffs)            {       trashCode += 'k';       }
445                 else{ currentSeqDiffs += success;  }
446                 
447             }
448             
449             if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); }
450             
451                         if(barcodes.size() != 0){
452                                 success = trimOligos.stripBarcode(currSeq, barcodeIndex);
453                                 if(success > bdiffs)            {       trashCode += 'b';       }
454                                 else{ currentSeqDiffs += success;  }
455                         }
456                         
457             if(numSpacers != 0){
458                 success = trimOligos.stripSpacer(currSeq);
459                 if(success > sdiffs)            {       trashCode += 's';       }
460                 else{ currentSeqDiffs += success;  }
461                 
462             }
463             
464                         if(numFPrimers != 0){
465                                 success = trimOligos.stripForward(currSeq, primerIndex);
466                                 if(success > pdiffs)            {       trashCode += 'f';       }
467                                 else{ currentSeqDiffs += success;  }
468                         }
469                         
470                         if (currentSeqDiffs > tdiffs)   {       trashCode += 't';   }
471                         
472                         if(numRPrimers != 0){
473                                 success = trimOligos.stripReverse(currSeq);
474                                 if(!success)                            {       trashCode += 'r';       }
475                         }
476                         
477                         if(trashCode.length() == 0){
478                 string thisGroup = "";
479                 if(barcodes.size() != 0){
480                     thisGroup = barcodeNameVector[barcodeIndex];
481                     if (primers.size() != 0) { 
482                         if (primerNameVector[primerIndex] != "") { 
483                             if(thisGroup != "") {
484                                 thisGroup += "." + primerNameVector[primerIndex]; 
485                             }else {
486                                 thisGroup = primerNameVector[primerIndex]; 
487                             }
488                         } 
489                     }
490                 }
491                 
492                 int pos = thisGroup.find("ignore");
493                 if (pos == string::npos) {              
494                     flowData.printFlows(trimFlowFile);
495                     
496                     if(fasta)   { currSeq.printSequence(fastaFile);     }
497                     
498                     if(allFiles){
499                         ofstream output;
500                         m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
501                         output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
502                         
503                         flowData.printFlows(output);
504                         output.close();
505                     }
506                 }
507                         }
508                         else{
509                                 flowData.printFlows(scrapFlowFile, trashCode);
510                         }
511                                 
512                         count++;
513                         //cout << "driver" << '\t' << currSeq.getName() << endl;                        
514                         //report progress
515                         if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
516
517 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
518                         unsigned long long pos = flowFile.tellg();
519
520                         if ((pos == -1) || (pos >= line->end)) { break; }
521 #else
522                         if (flowFile.eof()) { break; }
523 #endif
524                         
525                 }
526                 //report progress
527                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
528                 
529                 trimFlowFile.close();
530                 scrapFlowFile.close();
531                 flowFile.close();
532                 if(fasta){      fastaFile.close();      }
533                 
534                 return count;
535         }
536         catch(exception& e) {
537                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
538                 exit(1);
539         }
540 }
541
542 //***************************************************************************************************************
543
544 void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
545         try {
546                 ifstream oligosFile;
547                 m->openInputFile(oligoFileName, oligosFile);
548                 
549                 string type, oligo, group;
550
551                 int indexPrimer = 0;
552                 int indexBarcode = 0;
553                 
554                 while(!oligosFile.eof()){
555                 
556                         oligosFile >> type;     //get the first column value of the row - is it a comment or a feature we are interested in?
557             
558             if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); }
559             
560                         if(type[0] == '#'){     //igore the line because there's a comment
561                                 while (!oligosFile.eof())       {       char c = oligosFile.get(); if (c == 10 || c == 13){     break;  }       }
562                 m->gobble(oligosFile);// get rest of line if there's any crap there
563                         }
564                         else{                           //there's a feature we're interested in
565                 m->gobble(oligosFile);
566                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }                                  //make type case insensitive
567
568                                 oligosFile >> oligo;    //get the DNA sequence for the feature
569
570                                 for(int i=0;i<oligo.length();i++){      //make type case insensitive and change any U's to T's
571                                         oligo[i] = toupper(oligo[i]);
572                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
573                                 }
574                 
575                 if (m->debug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); }
576                 
577                                 if(type == "FORWARD"){  //if the feature is a forward primer...
578                                         group = "";
579
580                                         while (!oligosFile.eof())       {       // get rest of line in case there is a primer name = will have the name of the primer
581                                                 char c = oligosFile.get(); 
582                                                 if (c == 10 || c == 13 || c == -1){     break;  }
583                                                 else if (c == 32 || c == 9){;} //space or tab
584                                                 else {  group += c;  }
585                                         } 
586
587                                         //have we seen this primer already?
588                                         map<string, int>::iterator itPrimer = primers.find(oligo);
589                                         if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
590
591                                         primers[oligo]=indexPrimer; indexPrimer++;
592                                         primerNameVector.push_back(group);
593
594                                 }
595                                 else if(type == "REVERSE"){
596                                         string oligoRC = reverseOligo(oligo);
597                                         revPrimer.push_back(oligoRC);
598                                 }
599                                 else if(type == "BARCODE"){
600                                         oligosFile >> group;
601
602                                         //check for repeat barcodes
603                                         map<string, int>::iterator itBar = barcodes.find(oligo);
604                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
605                     
606                     if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); }
607                     
608                                         barcodes[oligo]=indexBarcode; indexBarcode++;
609                                         barcodeNameVector.push_back(group);
610                                 }else if(type == "LINKER"){
611                                         linker.push_back(oligo);
612                                 }else if(type == "SPACER"){
613                                         spacer.push_back(oligo);
614                                 }
615                                 else{
616                                         m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  
617                                 }
618                         }
619
620                         m->gobble(oligosFile);
621                 }
622                 oligosFile.close();
623                 
624                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
625                 
626                 //add in potential combos
627                 if(barcodeNameVector.size() == 0){
628                         barcodes[""] = 0;
629                         barcodeNameVector.push_back("");                        
630                 }
631                 
632                 if(primerNameVector.size() == 0){
633                         primers[""] = 0;
634                         primerNameVector.push_back("");                 
635                 }
636                 
637                 
638                 outFlowFileNames.resize(barcodeNameVector.size());
639                 for(int i=0;i<outFlowFileNames.size();i++){
640                         outFlowFileNames[i].assign(primerNameVector.size(), "");
641                 }
642                 
643                 if(allFiles){
644
645                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
646                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
647
648                                         string primerName = primerNameVector[itPrimer->second];
649                                         string barcodeName = barcodeNameVector[itBar->second];
650                     
651                                         if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
652                                         else {                                  
653                         string comboGroupName = "";
654                         string fileName = "";
655                         
656                         map<string, string> variables; 
657                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
658                         
659                         if(primerName == ""){
660                             comboGroupName = barcodeNameVector[itBar->second];
661                             variables["[tag]"] = comboGroupName;
662                             fileName = getOutputFileName("flow", variables);
663                         }
664                         else{
665                             if(barcodeName == ""){
666                                 comboGroupName = primerNameVector[itPrimer->second];
667                             }
668                             else{
669                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
670                             }
671                             variables["[tag]"] = comboGroupName;
672                             fileName = getOutputFileName("flow", variables);
673                         }
674                         
675                         outFlowFileNames[itBar->second][itPrimer->second] = fileName;
676                         
677                         ofstream temp;
678                         m->openOutputFile(fileName, temp);
679                         temp.close();
680                     }
681                                 }
682                         }
683                 }
684                 
685                 numFPrimers = primers.size();
686                 numRPrimers = revPrimer.size();
687         numLinkers = linker.size();
688         numSpacers = spacer.size();
689                 
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
693                 exit(1);
694         }
695 }
696 //********************************************************************/
697 string TrimFlowsCommand::reverseOligo(string oligo){
698         try {
699         string reverse = "";
700         
701         for(int i=oligo.length()-1;i>=0;i--){
702             
703             if(oligo[i] == 'A')         {       reverse += 'T'; }
704             else if(oligo[i] == 'T'){   reverse += 'A'; }
705             else if(oligo[i] == 'U'){   reverse += 'A'; }
706             
707             else if(oligo[i] == 'G'){   reverse += 'C'; }
708             else if(oligo[i] == 'C'){   reverse += 'G'; }
709             
710             else if(oligo[i] == 'R'){   reverse += 'Y'; }
711             else if(oligo[i] == 'Y'){   reverse += 'R'; }
712             
713             else if(oligo[i] == 'M'){   reverse += 'K'; }
714             else if(oligo[i] == 'K'){   reverse += 'M'; }
715             
716             else if(oligo[i] == 'W'){   reverse += 'W'; }
717             else if(oligo[i] == 'S'){   reverse += 'S'; }
718             
719             else if(oligo[i] == 'B'){   reverse += 'V'; }
720             else if(oligo[i] == 'V'){   reverse += 'B'; }
721             
722             else if(oligo[i] == 'D'){   reverse += 'H'; }
723             else if(oligo[i] == 'H'){   reverse += 'D'; }
724             
725             else                                                {       reverse += 'N'; }
726         }
727         
728         
729         return reverse;
730     }
731         catch(exception& e) {
732                 m->errorOut(e, "TrimFlowsCommand", "reverseOligo");
733                 exit(1);
734         }
735 }
736
737 /**************************************************************************************************/
738 vector<unsigned long long> TrimFlowsCommand::getFlowFileBreaks() {
739
740         try{
741                         
742                 vector<unsigned long long> filePos;
743                 filePos.push_back(0);
744                                         
745                 FILE * pFile;
746                 unsigned long long size;
747                 
748                 //get num bytes in file
749                 pFile = fopen (flowFileName.c_str(),"rb");
750                 if (pFile==NULL) perror ("Error opening file");
751                 else{
752                         fseek (pFile, 0, SEEK_END);
753                         size=ftell (pFile);
754                         fclose (pFile);
755                 }
756                                 
757                 //estimate file breaks
758                 unsigned long long chunkSize = 0;
759                 chunkSize = size / processors;
760
761                 //file too small to divide by processors
762                 if (chunkSize == 0)  {  processors = 1; filePos.push_back(size); return filePos;        }
763                 
764                 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
765                 for (int i = 0; i < processors; i++) {
766                         unsigned long long spot = (i+1) * chunkSize;
767                         
768                         ifstream in;
769                         m->openInputFile(flowFileName, in);
770                         in.seekg(spot);
771                         
772                         string dummy = m->getline(in);
773                         
774                         //there was not another sequence before the end of the file
775                         unsigned long long sanityPos = in.tellg();
776                         
777 //                      if (sanityPos == -1) {  break;  }
778 //                      else {  filePos.push_back(newSpot);  }
779                         if (sanityPos == -1) {  break;  }
780                         else {  filePos.push_back(sanityPos);  }
781                         
782                         in.close();
783                 }
784                 
785                 //save end pos
786                 filePos.push_back(size);
787                 
788                 //sanity check filePos
789                 for (int i = 0; i < (filePos.size()-1); i++) {
790                         if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
791                 }
792
793                 ifstream in;
794                 m->openInputFile(flowFileName, in);
795                 in >> numFlows;
796                 m->gobble(in);
797                 //unsigned long long spot = in.tellg();
798                 //filePos[0] = spot;
799                 in.close();
800                 
801                 processors = (filePos.size() - 1);
802                 
803                 return filePos; 
804         }
805         catch(exception& e) {
806                 m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks");
807                 exit(1);
808         }
809 }
810
811 /**************************************************************************************************/
812
813 int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames){
814
815         try {
816                 processIDS.clear();
817                 int exitCommand = 1;
818                 
819 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
820                 int process = 1;
821                 
822                 //loop through and create all the processes you want
823                 while (process != processors) {
824                         int pid = fork();
825                         
826                         if (pid > 0) {
827                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
828                                 process++;
829                         }else if (pid == 0){
830                                 
831                                 vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
832                                 if(allFiles){
833                                         for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
834                                                 for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
835                             if (tempBarcodePrimerComboFileNames[i][j] != "") {
836                                 tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
837                                 ofstream temp;
838                                 m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
839                                 temp.close();
840                             }
841                                                 }
842                                         }
843                                 }
844                                 driverCreateTrim(flowFileName,
845                                                                  (trimFlowFileName + toString(getpid()) + ".temp"),
846                                                                  (scrapFlowFileName + toString(getpid()) + ".temp"),
847                                                                  (fastaFileName + toString(getpid()) + ".temp"),
848                                                                  tempBarcodePrimerComboFileNames, lines[process]);
849
850                                 exit(0);
851                         }else { 
852                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
853                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
854                                 exit(0);
855                         }
856                 }
857                 
858                 //parent do my part
859                 ofstream temp;
860                 m->openOutputFile(trimFlowFileName, temp);
861                 temp.close();
862
863                 m->openOutputFile(scrapFlowFileName, temp);
864                 temp.close();
865                 
866                 if(fasta){
867                         m->openOutputFile(fastaFileName, temp);
868                         temp.close();
869                 }
870                 
871                 driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
872
873                 //force parent to wait until all the processes are done
874                 for (int i=0;i<processIDS.size();i++) { 
875                         int temp = processIDS[i];
876                         wait(&temp);
877                 }
878 #else
879                 //////////////////////////////////////////////////////////////////////////////////////////////////////
880                 //Windows version shared memory, so be careful when passing variables through the trimFlowData struct. 
881                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
882                 //////////////////////////////////////////////////////////////////////////////////////////////////////
883                 
884                 vector<trimFlowData*> pDataArray; 
885                 DWORD   dwThreadIdArray[processors-1];
886                 HANDLE  hThreadArray[processors-1]; 
887                 
888                 //Create processor worker threads.
889                 for( int i=0; i<processors-1; i++ ){
890                         // Allocate memory for thread data.
891                         string extension = "";
892                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
893                         
894                         vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
895                         if(allFiles){
896                                 for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
897                                         for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
898                         if (tempBarcodePrimerComboFileNames[i][j] != "") {
899                             tempBarcodePrimerComboFileNames[i][j] += extension;
900                             ofstream temp;
901                             m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
902                             temp.close();
903                                                 }
904                                         }
905                                 }
906                         }
907                         
908                         trimFlowData* tempflow = new trimFlowData(flowFileName, (trimFlowFileName + extension), (scrapFlowFileName + extension), fastaFileName, flowOrder, tempBarcodePrimerComboFileNames, barcodes, primers, revPrimer, fasta, allFiles, lines[i]->start, lines[i]->end, m, signal, noise, numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, i);
909                         pDataArray.push_back(tempflow);
910                         
911                         //MyTrimFlowThreadFunction is in header. It must be global or static to work with the threads.
912                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
913                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimFlowThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
914                 }
915                 
916                 //using the main process as a worker saves time and memory
917                 ofstream temp;
918                 m->openOutputFile(trimFlowFileName, temp);
919                 temp.close();
920                 
921                 m->openOutputFile(scrapFlowFileName, temp);
922                 temp.close();
923                 
924                 if(fasta){
925                         m->openOutputFile(fastaFileName, temp);
926                         temp.close();
927                 }
928                 
929                 vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
930                 if(allFiles){
931                         for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
932                                 for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
933                     if (tempBarcodePrimerComboFileNames[i][j] != "") {
934                         tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp";
935                         ofstream temp;
936                         m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
937                         temp.close();
938                     }
939                                         
940                                 }
941                         }
942                 }
943                 
944                 //do my part - do last piece because windows is looking for eof
945                 int num = driverCreateTrim(flowFileName, (trimFlowFileName  + toString(processors-1) + ".temp"), (scrapFlowFileName  + toString(processors-1) + ".temp"), (fastaFileName + toString(processors-1) + ".temp"), tempBarcodePrimerComboFileNames, lines[processors-1]);
946                 processIDS.push_back((processors-1)); 
947                 
948                 //Wait until all threads have terminated.
949                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
950                 
951                 //Close all thread handles and free memory allocations.
952                 for(int i=0; i < pDataArray.size(); i++){
953                         num += pDataArray[i]->count;
954                         CloseHandle(hThreadArray[i]);
955                         delete pDataArray[i];
956                 }
957                 
958                 
959 #endif  
960                 //append files
961                 m->mothurOutEndLine();
962                 for(int i=0;i<processIDS.size();i++){
963                         
964                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
965                         
966                         m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName);
967                         m->mothurRemove((trimFlowFileName + toString(processIDS[i]) + ".temp"));
968 //                      m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine();
969
970                         m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName);
971                         m->mothurRemove((scrapFlowFileName + toString(processIDS[i]) + ".temp"));
972 //                      m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine();
973
974                         if(fasta){
975                                 m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName);
976                                 m->mothurRemove((fastaFileName + toString(processIDS[i]) + ".temp"));
977 //                              m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine();
978                         }
979                         if(allFiles){                                           
980                                 for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) {
981                                         for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) {
982                         if (barcodePrimerComboFileNames[j][k] != "") {
983                             m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]);
984                             m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"));
985                         }
986                                         }
987                                 }
988                         }
989                 }
990                 
991                 return exitCommand;
992         
993         }
994         catch(exception& e) {
995                 m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim");
996                 exit(1);
997         }
998 }
999
1000 //***************************************************************************************************************