]> git.donarmstrong.com Git - mothur.git/blob - classifyseqscommand.cpp
1.23.0
[mothur.git] / classifyseqscommand.cpp
1 /*
2  *  classifyseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/2/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyseqscommand.h"
11
12
13
14 //**********************************************************************************************************************
15 vector<string> ClassifySeqsCommand::setParameters(){    
16         try {
17                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
18                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance", "kmer", "", "", "",false,false); parameters.push_back(psearch);
23                 CommandParameter pksize("ksize", "Number", "", "8", "", "", "",false,false); parameters.push_back(pksize);
24                 CommandParameter pmethod("method", "Multiple", "bayesian-knn", "bayesian", "", "", "",false,false); parameters.push_back(pmethod);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
27                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
28                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
29                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
30                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
31                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
32                 CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
33                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
34                 CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted);
35                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
36                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
37                 
38                 vector<string> myArray;
39                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
40                 return myArray;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ClassifySeqsCommand", "setParameters");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 string ClassifySeqsCommand::getHelpString(){    
49         try {
50                 string helpString = "";
51                 helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n";
52                 helpString += "The classify.seqs command parameters are reference, fasta, name, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
53                 helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
54                 helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast and distance. The default is kmer.\n";
55                 helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n";
56                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
57                 helpString += "The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n";
58                 helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n";
59                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
60 #ifdef USE_MPI
61                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
62 #endif
63                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
64                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
65                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
66                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
67                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
68                 helpString += "The numwanted parameter allows you to specify the number of sequence matches you want with the knn method.  The default is 10.\n";
69                 helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n";
70                 helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
71                 helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n";
72                 helpString += "The classify.seqs command should be in the following format: \n";
73                 helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
74                 helpString += "Example classify.seqs(fasta=amazon.fasta, reference=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n";
75                 helpString += "The .taxonomy file consists of 2 columns: 1 = your sequence name, 2 = the taxonomy for your sequence. \n";
76                 helpString += "The .tax.summary is a summary of the different taxonomies represented in your fasta file. \n";
77                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
78                 return helpString;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ClassifySeqsCommand", "getHelpString");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 ClassifySeqsCommand::ClassifySeqsCommand(){     
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["taxonomy"] = tempOutNames;
92                 outputTypes["taxsummary"] = tempOutNames;
93                 outputTypes["matchdist"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
102         try {
103                 abort = false; calledHelp = false;   
104                 rdb = ReferenceDB::getInstance();
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string, string> parameters = parser.getParameters(); 
115                         
116                         ValidParameters validParameter("classify.seqs");
117                         map<string, string>::iterator it;
118                         
119                         //check to make sure all parameters are valid for command
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         //initialize outputTypes
125                         vector<string> tempOutNames;
126                         outputTypes["taxonomy"] = tempOutNames;
127                         outputTypes["taxsummary"] = tempOutNames;
128                         outputTypes["matchdist"] = tempOutNames;
129                         
130                         //if the user changes the output directory command factory will send this info to us in the output parameter 
131                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
132                         
133                         //if the user changes the input directory command factory will send this info to us in the output parameter 
134                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
135                         if (inputDir == "not found"){   inputDir = "";          }
136                         else {
137                                 string path;
138                                 it = parameters.find("reference");
139                                 //user has given a template file
140                                 if(it != parameters.end()){ 
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
144                                 }
145                                 
146                                 it = parameters.find("taxonomy");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
152                                 }
153                                 
154                                 it = parameters.find("group");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
160                                 }
161                         }
162
163                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
164                         if (fastaFileName == "not found") {                             
165                                 //if there is a current fasta file, use it
166                                 string filename = m->getFastaFile(); 
167                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
168                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
169                         }
170                         else { 
171                                 m->splitAtDash(fastaFileName, fastaFileNames);
172                                 
173                                 //go through files and make sure they are good, if not, then disregard them
174                                 for (int i = 0; i < fastaFileNames.size(); i++) {
175                                         
176                                         bool ignore = false;
177                                         if (fastaFileNames[i] == "current") { 
178                                                 fastaFileNames[i] = m->getFastaFile(); 
179                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
180                                                 else {  
181                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
182                                                         //erase from file list
183                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
184                                                         i--;
185                                                 }
186                                         }
187                                         
188                                         if (!ignore) {
189                                                 
190                                                 if (inputDir != "") {
191                                                         string path = m->hasPath(fastaFileNames[i]);
192                                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
194                                                 }
195                                                 
196                                                 int ableToOpen;
197                                                 
198                                                 ifstream in;
199                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
200                                         
201                                                 //if you can't open it, try default location
202                                                 if (ableToOpen == 1) {
203                                                         if (m->getDefaultPath() != "") { //default path is set
204                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
205                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
206                                                                 ifstream in2;
207                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
208                                                                 in2.close();
209                                                                 fastaFileNames[i] = tryPath;
210                                                         }
211                                                 }
212                                                 
213                                                 if (ableToOpen == 1) {
214                                                         if (m->getOutputDir() != "") { //default path is set
215                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
216                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
217                                                                 ifstream in2;
218                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
219                                                                 in2.close();
220                                                                 fastaFileNames[i] = tryPath;
221                                                         }
222                                                 }
223                                                 
224                                                 in.close();
225                                                 
226                                                 if (ableToOpen == 1) { 
227                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
228                                                         //erase from file list
229                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
230                                                         i--;
231                                                 }else {
232                                                         m->setFastaFile(fastaFileNames[i]);
233                                                 }
234                                         }
235                                         
236                                 }
237                                 
238                                 //make sure there is at least one valid file left
239                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
240                         }
241
242                         namefile = validParameter.validFile(parameters, "name", false);
243                         if (namefile == "not found") { namefile = "";  }
244
245                         else { 
246                                 m->splitAtDash(namefile, namefileNames);
247                                 
248                                 //go through files and make sure they are good, if not, then disregard them
249                                 for (int i = 0; i < namefileNames.size(); i++) {
250                                         bool ignore = false;
251                                         if (namefileNames[i] == "current") { 
252                                                 namefileNames[i] = m->getNameFile(); 
253                                                 if (namefileNames[i] != "") {  m->mothurOut("Using " + namefileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
254                                                 else {  
255                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
256                                                         //erase from file list
257                                                         namefileNames.erase(namefileNames.begin()+i);
258                                                         i--;
259                                                 }
260                                         }
261                                         
262                                         if (!ignore) {
263                                                 
264                                                 if (inputDir != "") {
265                                                         string path = m->hasPath(namefileNames[i]);
266                                                         //if the user has not given a path then, add inputdir. else leave path alone.
267                                                         if (path == "") {       namefileNames[i] = inputDir + namefileNames[i];         }
268                                                 }
269                                                 int ableToOpen;
270                                                 
271                                                 ifstream in;
272                                                 ableToOpen = m->openInputFile(namefileNames[i], in, "noerror");
273                                         
274                                                 //if you can't open it, try default location
275                                                 if (ableToOpen == 1) {
276                                                         if (m->getDefaultPath() != "") { //default path is set
277                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(namefileNames[i]);
278                                                                 m->mothurOut("Unable to open " + namefileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
279                                                                 ifstream in2;
280                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
281                                                                 in2.close();
282                                                                 namefileNames[i] = tryPath;
283                                                         }
284                                                 }
285                                                 
286                                                 if (ableToOpen == 1) {
287                                                         if (m->getOutputDir() != "") { //default path is set
288                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(namefileNames[i]);
289                                                                 m->mothurOut("Unable to open " + namefileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
290                                                                 ifstream in2;
291                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
292                                                                 in2.close();
293                                                                 namefileNames[i] = tryPath;
294                                                         }
295                                                 }
296                                                 in.close();
297                                                 
298                                                 if (ableToOpen == 1) { 
299                                                         m->mothurOut("Unable to open " + namefileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();  abort = true;
300                                                         //erase from file list
301                                                         namefileNames.erase(namefileNames.begin()+i);
302                                                         i--;
303                                                 }else {
304                                                         m->setNameFile(namefileNames[i]);
305                                                 }
306                                         }
307                                 }
308                         }
309
310                         if (namefile != "") {
311                                 if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); }
312                         }
313                         
314                         groupfile = validParameter.validFile(parameters, "group", false);
315                         if (groupfile == "not found") { groupfile = "";  }
316                         else { 
317                                 m->splitAtDash(groupfile, groupfileNames);
318                                 
319                                 //go through files and make sure they are good, if not, then disregard them
320                                 for (int i = 0; i < groupfileNames.size(); i++) {
321                                         if (inputDir != "") {
322                                                 string path = m->hasPath(groupfileNames[i]);
323                                                 //if the user has not given a path then, add inputdir. else leave path alone.
324                                                 if (path == "") {       groupfileNames[i] = inputDir + groupfileNames[i];               }
325                                         }
326                                         int ableToOpen;
327                                         
328                                         ifstream in;
329                                         ableToOpen = m->openInputFile(groupfileNames[i], in, "noerror");
330                                 
331                                         //if you can't open it, try default location
332                                         if (ableToOpen == 1) {
333                                                 if (m->getDefaultPath() != "") { //default path is set
334                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(groupfileNames[i]);
335                                                         m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
336                                                         ifstream in2;
337                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
338                                                         in2.close();
339                                                         groupfileNames[i] = tryPath;
340                                                 }
341                                         }
342                                         
343                                         if (ableToOpen == 1) {
344                                                 if (m->getOutputDir() != "") { //default path is set
345                                                         string tryPath = m->getOutputDir() + m->getSimpleName(groupfileNames[i]);
346                                                         m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
347                                                         ifstream in2;
348                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
349                                                         in2.close();
350                                                         groupfileNames[i] = tryPath;
351                                                 }
352                                         }
353                                         
354                                         in.close();
355                                         
356                                         if (ableToOpen == 1) { 
357                                                 m->mothurOut("Unable to open " + groupfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); groupfileNames[i] = "";
358                                                 //erase from file list
359                                                 groupfileNames.erase(groupfileNames.begin()+i);
360                                                 i--;
361                                         }else {
362                                                 m->setGroupFile(groupfileNames[i]);
363                                         }
364                                 }
365                         }
366
367                         if (groupfile != "") {
368                                 if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); }
369                         }else {
370                                 for (int i = 0; i < fastaFileNames.size(); i++) {  groupfileNames.push_back("");  }
371                         }
372                         
373                         //check for optional parameter and set defaults
374                         // ...at some point should added some additional type checking...
375                         string temp;
376                         temp = validParameter.validFile(parameters, "ksize", false);            if (temp == "not found"){       temp = "8";                             }
377                         m->mothurConvert(temp, kmerSize); 
378                         
379                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
380                         m->setProcessors(temp);
381                         m->mothurConvert(temp, processors); 
382                         
383                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
384                         save = m->isTrue(temp); 
385                         rdb->save = save; 
386                         if (save) { //clear out old references
387                                 rdb->clearMemory();     
388                         }
389                         
390                         //this has to go after save so that if the user sets save=t and provides no reference we abort
391                         templateFileName = validParameter.validFile(parameters, "reference", true);
392                         if (templateFileName == "not found") { 
393                                 //check for saved reference sequences
394                                 if (rdb->referenceSeqs.size() != 0) {
395                                         templateFileName = "saved";
396                                 }else {
397                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the classify.seqs command."); 
398                                         m->mothurOutEndLine();
399                                         abort = true; 
400                                 }
401                         }else if (templateFileName == "not open") { abort = true; }     
402                         else {  if (save) {     rdb->setSavedReference(templateFileName);       }       }
403                         
404                         //this has to go after save so that if the user sets save=t and provides no reference we abort
405                         taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
406                         if (taxonomyFileName == "not found") { 
407                                 //check for saved reference sequences
408                                 if (rdb->wordGenusProb.size() != 0) {
409                                         taxonomyFileName = "saved";
410                                 }else {
411                                         m->mothurOut("[ERROR]: You don't have any saved taxonomy information and the taxonomy parameter is a required for the classify.seqs command."); 
412                                         m->mothurOutEndLine();
413                                         abort = true; 
414                                 }
415                         }else if (taxonomyFileName == "not open") { abort = true; }     
416                         else {  if (save) {     rdb->setSavedTaxonomy(taxonomyFileName);        }       }
417                         
418                         search = validParameter.validFile(parameters, "search", false);         if (search == "not found"){     search = "kmer";                }
419                         
420                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "bayesian";    }
421                         
422                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
423                         m->mothurConvert(temp, match);  
424                         
425                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
426                         m->mothurConvert(temp, misMatch);  
427                         
428                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
429                         m->mothurConvert(temp, gapOpen);  
430                         
431                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
432                         m->mothurConvert(temp, gapExtend); 
433                         
434                         temp = validParameter.validFile(parameters, "numwanted", false);        if (temp == "not found"){       temp = "10";                    }
435                         m->mothurConvert(temp, numWanted);
436                         
437                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found"){       temp = "0";                             }
438                         m->mothurConvert(temp, cutoff);
439                         
440                         temp = validParameter.validFile(parameters, "probs", false);            if (temp == "not found"){       temp = "true";                  }
441                         probs = m->isTrue(temp);
442                         
443                         temp = validParameter.validFile(parameters, "iters", false);            if (temp == "not found") { temp = "100";                        }
444                         m->mothurConvert(temp, iters); 
445
446
447                         
448                         if ((method == "bayesian") && (search != "kmer"))  { 
449                                 m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
450                                 search = "kmer";
451                         }
452                 }
453                 
454         }
455         catch(exception& e) {
456                 m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
457                 exit(1);
458         }
459 }
460
461 //**********************************************************************************************************************
462 ClassifySeqsCommand::~ClassifySeqsCommand(){    
463         if (abort == false) {
464                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
465         }
466 }
467 //**********************************************************************************************************************
468
469 int ClassifySeqsCommand::execute(){
470         try {
471                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
472                 
473                 if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());           }
474                 else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
475                 else {
476                         m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
477                         m->mothurOutEndLine();
478                         classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());   
479                 }
480                 
481                 if (m->control_pressed) { delete classify; return 0; }
482                                 
483                 for (int s = 0; s < fastaFileNames.size(); s++) {
484                 
485                         m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
486                         
487                         string baseTName = taxonomyFileName;
488                         if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy();  }
489                         
490                         string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
491                         RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
492                         if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
493                         RippedTaxName +=  "."; 
494                 
495                         if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
496                         string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
497                         string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
498                         string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
499                         
500                         if ((method == "knn") && (search == "distance")) { 
501                                 string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "match.dist";
502                                 classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
503                         }
504                         
505                         outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
506                         outputNames.push_back(taxSummary);      outputTypes["taxsummary"].push_back(taxSummary);
507                         
508                         int start = time(NULL);
509                         int numFastaSeqs = 0;
510                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
511                         
512 #ifdef USE_MPI  
513                                 int pid, numSeqsPerProcessor; 
514                                 int tag = 2001;
515                                 vector<unsigned long long> MPIPos;
516                                 
517                                 MPI_Status status; 
518                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
519                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
520
521                                 MPI_File inMPI;
522                                 MPI_File outMPINewTax;
523                                 MPI_File outMPITempTax;
524                                                         
525                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
526                                 int inMode=MPI_MODE_RDONLY; 
527                                 
528                                 char outNewTax[1024];
529                                 strcpy(outNewTax, newTaxonomyFile.c_str());
530                                 
531                                 char outTempTax[1024];
532                                 strcpy(outTempTax, tempTaxonomyFile.c_str());
533                                 
534                                 char inFileName[1024];
535                                 strcpy(inFileName, fastaFileNames[s].c_str());
536
537                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
538                                 MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax);
539                                 MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax);
540                                 
541                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify;  return 0;  }
542                                 
543                                 if (pid == 0) { //you are the root process 
544                                         
545                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
546                                         
547                                         //send file positions to all processes
548                                         for(int i = 1; i < processors; i++) { 
549                                                 MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
550                                                 MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
551                                         }
552                                         
553                                         //figure out how many sequences you have to align
554                                         numSeqsPerProcessor = numFastaSeqs / processors;
555                                         int startIndex =  pid * numSeqsPerProcessor;
556                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
557                                         
558                                 
559                                         //align your part
560                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
561                                         
562                                         if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
563                                         
564                                         for (int i = 1; i < processors; i++) {
565                                                 int done;
566                                                 MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
567                                         }
568                                 }else{ //you are a child process
569                                         MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
570                                         MPIPos.resize(numFastaSeqs+1);
571                                         MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
572                                         
573                                         //figure out how many sequences you have to align
574                                         numSeqsPerProcessor = numFastaSeqs / processors;
575                                         int startIndex =  pid * numSeqsPerProcessor;
576                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
577                                         
578                                         
579                                         //align your part
580                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
581                                         
582                                         if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
583
584                                         int done = 0;
585                                         MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
586                                 }
587                                 
588                                 //close files 
589                                 MPI_File_close(&inMPI);
590                                 MPI_File_close(&outMPINewTax);
591                                 MPI_File_close(&outMPITempTax);
592                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
593                                 
594 #else
595                 
596                         vector<unsigned long long> positions; 
597 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
598                         positions = m->divideFile(fastaFileNames[s], processors);
599                         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
600 #else
601                         if (processors == 1) {
602                                 lines.push_back(new linePair(0, 1000));
603                         }else {
604                                 positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); 
605                                 
606                                 //figure out how many sequences you have to process
607                                 int numSeqsPerProcessor = numFastaSeqs / processors;
608                                 for (int i = 0; i < processors; i++) {
609                                         int startIndex =  i * numSeqsPerProcessor;
610                                         if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
611                                         lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
612                                 }
613                         }
614 #endif
615                         if(processors == 1){
616                                 numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
617                         }else{
618                                 numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); 
619                         }
620 #endif
621
622                 m->mothurOutEndLine();
623                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
624                 start = time(NULL);
625
626
627                 #ifdef USE_MPI  
628                         if (pid == 0) {  //this part does not need to be paralellized
629                         
630                                 if(namefile != "") { m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();  MPIReadNamesFile(namefileNames[s]);  m->mothurOut("  Done."); m->mothurOutEndLine(); }
631                 #else
632                         //read namefile
633                         if(namefile != "") {
634                         
635                             m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
636                                 
637                                 nameMap.clear(); //remove old names
638                                 
639                                 ifstream inNames;
640                                 m->openInputFile(namefileNames[s], inNames);
641                                 
642                                 string firstCol, secondCol;
643                                 while(!inNames.eof()) {
644                                         inNames >> firstCol >> secondCol; m->gobble(inNames);
645                                         
646                                         vector<string> temp;
647                                         m->splitAtComma(secondCol, temp);
648                         
649                                         nameMap[firstCol] = temp;  
650                                 }
651                                 inNames.close();
652                                 
653                                 m->mothurOut("  Done."); m->mothurOutEndLine();
654                         }
655                 #endif
656
657                         string group = "";
658                         if (groupfile != "") {  group = groupfileNames[s]; }
659                         
660                         PhyloSummary taxaSum(baseTName, group);
661                         
662                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
663                 
664                         if (namefile == "") {  taxaSum.summarize(tempTaxonomyFile);  }
665                         else {
666                                 ifstream in;
667                                 m->openInputFile(tempTaxonomyFile, in);
668                                 
669                                 //read in users taxonomy file and add sequences to tree
670                                 string name, taxon;
671                                 
672                                 while(!in.eof()){
673                                         in >> name >> taxon; m->gobble(in);
674                                         
675                                         itNames = nameMap.find(name);
676                 
677                                         if (itNames == nameMap.end()) { 
678                                                 m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
679                                         }else{
680                                                 for (int i = 0; i < itNames->second.size(); i++) { 
681                                                         taxaSum.addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
682                                                 }
683                                                 itNames->second.clear();
684                                                 nameMap.erase(itNames->first);
685                                         }
686                                 }
687                                 in.close();
688                         }
689                         m->mothurRemove(tempTaxonomyFile);
690                         
691                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
692                         
693                         //print summary file
694                         ofstream outTaxTree;
695                         m->openOutputFile(taxSummary, outTaxTree);
696                         taxaSum.print(outTaxTree);
697                         outTaxTree.close();
698                         
699                         //output taxonomy with the unclassified bins added
700                         ifstream inTax;
701                         m->openInputFile(newTaxonomyFile, inTax);
702                         
703                         ofstream outTax;
704                         string unclass = newTaxonomyFile + ".unclass.temp";
705                         m->openOutputFile(unclass, outTax);
706                         
707                         //get maxLevel from phylotree so you know how many 'unclassified's to add
708                         int maxLevel = taxaSum.getMaxLevel();
709                                                         
710                         //read taxfile - this reading and rewriting is done to preserve the confidence scores.
711                         string name, taxon;
712                         while (!inTax.eof()) {
713                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } m->mothurRemove(unclass); delete classify; return 0; }
714
715                                 inTax >> name >> taxon; m->gobble(inTax);
716                                 
717                                 string newTax = addUnclassifieds(taxon, maxLevel);
718                                 
719                                 outTax << name << '\t' << newTax << endl;
720                         }
721                         inTax.close();  
722                         outTax.close();
723                         
724                         m->mothurRemove(newTaxonomyFile);
725                         rename(unclass.c_str(), newTaxonomyFile.c_str());
726                         
727                         m->mothurOutEndLine();
728                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
729                         
730                         #ifdef USE_MPI  
731                                 }
732                         #endif
733
734                         m->mothurOutEndLine();
735                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
736                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
737                         m->mothurOutEndLine();
738                 }
739                 
740                 //set taxonomy file as new current taxonomyfile
741                 string current = "";
742                 itTypes = outputTypes.find("taxonomy");
743                 if (itTypes != outputTypes.end()) {
744                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
745                 }
746                 
747                 delete classify;
748                 
749                 return 0;
750         }
751         catch(exception& e) {
752                 m->errorOut(e, "ClassifySeqsCommand", "execute");
753                 exit(1);
754         }
755 }
756
757 /**************************************************************************************************/
758 string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) {
759         try{
760                 string newTax, taxon;
761                 int level = 0;
762                 
763                 //keep what you have counting the levels
764                 while (tax.find_first_of(';') != -1) {
765                         //get taxon
766                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
767                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
768                         newTax += taxon;
769                         level++;
770                 }
771                 
772                 //add "unclassified" until you reach maxLevel
773                 while (level < maxlevel) {
774                         newTax += "unclassified;";
775                         level++;
776                 }
777                 
778                 return newTax;
779         }
780         catch(exception& e) {
781                 m->errorOut(e, "ClassifySeqsCommand", "addUnclassifieds");
782                 exit(1);
783         }
784 }
785
786 /**************************************************************************************************/
787
788 int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
789         try {
790                 
791                 int num = 0;
792                 processIDS.clear();
793                 
794 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
795                 int process = 1;
796                 
797                 //loop through and create all the processes you want
798                 while (process != processors) {
799                         int pid = fork();
800                         
801                         if (pid > 0) {
802                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
803                                 process++;
804                         }else if (pid == 0){
805                                 num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
806
807                                 //pass numSeqs to parent
808                                 ofstream out;
809                                 string tempFile = filename + toString(getpid()) + ".num.temp";
810                                 m->openOutputFile(tempFile, out);
811                                 out << num << endl;
812                                 out.close();
813
814                                 exit(0);
815                         }else { 
816                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
817                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
818                                 exit(0);
819                         }
820                 }
821                 
822                 //parent does its part
823                 num = driver(lines[0], taxFileName, tempTaxFile, filename);
824                 
825                 //force parent to wait until all the processes are done
826                 for (int i=0;i<processIDS.size();i++) { 
827                         int temp = processIDS[i];
828                         wait(&temp);
829                 }
830                 
831                 for (int i = 0; i < processIDS.size(); i++) {
832                         ifstream in;
833                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
834                         m->openInputFile(tempFile, in);
835                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
836                         in.close(); m->mothurRemove(m->getFullPathName(tempFile));
837                 }
838 #else
839                 //////////////////////////////////////////////////////////////////////////////////////////////////////
840                 //Windows version shared memory, so be careful when passing variables through the alignData struct. 
841                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
842                 //////////////////////////////////////////////////////////////////////////////////////////////////////
843                 
844                 vector<classifyData*> pDataArray; 
845                 DWORD   dwThreadIdArray[processors-1];
846                 HANDLE  hThreadArray[processors-1]; 
847                 
848                 //Create processor worker threads.
849                 for( int i=0; i<processors-1; i++ ){
850                         // Allocate memory for thread data.
851                         string extension = "";
852                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
853                         
854                         classifyData* tempclass = new classifyData(probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i);
855                         pDataArray.push_back(tempclass);
856                         
857                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
858                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
859                         hThreadArray[i] = CreateThread(NULL, 0, MyClassThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);  
860                         
861                 }
862                 
863                 //parent does its part
864                 num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", filename);
865                 processIDS.push_back((processors-1));
866                 
867                 //Wait until all threads have terminated.
868                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
869                 
870                 //Close all thread handles and free memory allocations.
871                 for(int i=0; i < pDataArray.size(); i++){
872                         num += pDataArray[i]->count;
873                         CloseHandle(hThreadArray[i]);
874                         delete pDataArray[i];
875                 }
876                 
877         #endif  
878                 
879                 for(int i=0;i<processIDS.size();i++){
880                         appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
881                         appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
882                         m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
883                         m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
884                 }
885                 
886                 return num;
887                 
888         }
889         catch(exception& e) {
890                 m->errorOut(e, "ClassifySeqsCommand", "createProcesses");
891                 exit(1);
892         }
893 }
894 /**************************************************************************************************/
895
896 void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
897         try{
898                 
899                 ofstream output;
900                 ifstream input;
901                 m->openOutputFileAppend(filename, output);
902                 m->openInputFile(temp, input);
903                 
904                 while(char c = input.get()){
905                         if(input.eof())         {       break;                  }
906                         else                            {       output << c;    }
907                 }
908                 
909                 input.close();
910                 output.close();
911         }
912         catch(exception& e) {
913                 m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
914                 exit(1);
915         }
916 }
917
918 //**********************************************************************************************************************
919
920 int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
921         try {
922                 ofstream outTax;
923                 m->openOutputFile(taxFName, outTax);
924                 
925                 ofstream outTaxSimple;
926                 m->openOutputFile(tempTFName, outTaxSimple);
927         
928                 ifstream inFASTA;
929                 m->openInputFile(filename, inFASTA);
930                 
931                 string taxonomy;
932
933                 inFASTA.seekg(filePos->start);
934
935                 bool done = false;
936                 int count = 0;
937                 
938                 while (!done) {
939                         if (m->control_pressed) { return 0; }
940                 
941                         Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
942                         
943                         if (candidateSeq->getName() != "") {
944                         
945                                 taxonomy = classify->getTaxonomy(candidateSeq);
946                                 
947                                 if (m->control_pressed) { delete candidateSeq; return 0; }
948
949                                 if (taxonomy != "bad seq") {
950                                         //output confidence scores or not
951                                         if (probs) {
952                                                 outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
953                                         }else{
954                                                 outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
955                                         }
956                                         
957                                         outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
958                                 }
959                                 count++;
960                         }
961                         delete candidateSeq;
962                         
963                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
964                                 unsigned long long pos = inFASTA.tellg();
965                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
966                         #else
967                                 if (inFASTA.eof()) { break; }
968                         #endif
969                         
970                         //report progress
971                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
972                         
973                 }
974                 //report progress
975                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
976                         
977                 inFASTA.close();
978                 outTax.close();
979                 outTaxSimple.close();
980                 
981                 return count;
982         }
983         catch(exception& e) {
984                 m->errorOut(e, "ClassifySeqsCommand", "driver");
985                 exit(1);
986         }
987 }
988 //**********************************************************************************************************************
989 #ifdef USE_MPI
990 int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector<unsigned long long>& MPIPos){
991         try {
992                 MPI_Status statusNew; 
993                 MPI_Status statusTemp; 
994                 MPI_Status status; 
995                 
996                 int pid;
997                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
998         
999                 string taxonomy;
1000                 string outputString;
1001
1002                 for(int i=0;i<num;i++){
1003                 
1004                         if (m->control_pressed) { return 0; }
1005                 
1006                         //read next sequence
1007                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1008                         char* buf4 = new char[length];
1009                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1010                         
1011                         string tempBuf = buf4;
1012                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1013                         istringstream iss (tempBuf,istringstream::in);
1014                         delete buf4;
1015
1016                         Sequence* candidateSeq = new Sequence(iss);
1017                         
1018                         if (candidateSeq->getName() != "") {
1019                                 taxonomy = classify->getTaxonomy(candidateSeq);
1020                                 
1021                                 if (taxonomy != "bad seq") {
1022                                         //output confidence scores or not
1023                                         if (probs) {
1024                                                 outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
1025                                         }else{
1026                                                 outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
1027                                         }
1028                                         
1029                                         int length = outputString.length();
1030                                         char* buf2 = new char[length];
1031                                         memcpy(buf2, outputString.c_str(), length);
1032                                 
1033                                         MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
1034                                         delete buf2;
1035
1036                                         outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
1037                                         length = outputString.length();
1038                                         char* buf = new char[length];
1039                                         memcpy(buf, outputString.c_str(), length);
1040                                 
1041                                         MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
1042                                         delete buf;
1043                                 }
1044                         }                               
1045                         delete candidateSeq;
1046                         
1047                         if((i+1) % 100 == 0){   cout << "Classifying sequence " << (i+1) << endl;       }
1048                 }
1049                 
1050                 if(num % 100 != 0){     cout << "Classifying sequence " << (num) << endl;       }
1051                 
1052                 
1053                 return 1;
1054         }
1055         catch(exception& e) {
1056                 m->errorOut(e, "ClassifySeqsCommand", "driverMPI");
1057                 exit(1);
1058         }
1059 }
1060
1061 //**********************************************************************************************************************
1062 int ClassifySeqsCommand::MPIReadNamesFile(string nameFilename){
1063         try {
1064         
1065                 nameMap.clear(); //remove old names
1066                 
1067                 MPI_File inMPI;
1068                 MPI_Offset size;
1069                 MPI_Status status;
1070
1071                 //char* inFileName = new char[nameFilename.length()];
1072                 //memcpy(inFileName, nameFilename.c_str(), nameFilename.length());
1073                 
1074                 char inFileName[1024];
1075                 strcpy(inFileName, nameFilename.c_str());
1076
1077                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
1078                 MPI_File_get_size(inMPI, &size);
1079                 //delete inFileName;
1080
1081                 char* buffer = new char[size];
1082                 MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
1083
1084                 string tempBuf = buffer;
1085                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
1086                 istringstream iss (tempBuf,istringstream::in);
1087                 delete buffer;
1088                 
1089                 string firstCol, secondCol;
1090                 while(!iss.eof()) {
1091                         iss >> firstCol >> secondCol; m->gobble(iss);
1092                         
1093                         vector<string> temp;
1094                         m->splitAtComma(secondCol, temp);
1095                         
1096                         nameMap[firstCol] = temp;  
1097                 }
1098         
1099                 MPI_File_close(&inMPI);
1100                 
1101                 return 1;
1102         }
1103         catch(exception& e) {
1104                 m->errorOut(e, "ClassifySeqsCommand", "MPIReadNamesFile");
1105                 exit(1);
1106         }
1107 }
1108 #endif
1109 /**************************************************************************************************/