]> git.donarmstrong.com Git - mothur.git/blob - classifyseqscommand.cpp
added multiple processors option for Windows users to align.seqs, dist.seqs, summary...
[mothur.git] / classifyseqscommand.cpp
1 /*
2  *  classifyseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/2/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classifyseqscommand.h"
11 #include "sequence.hpp"
12 #include "bayesian.h"
13 #include "phylotree.h"
14 #include "phylosummary.h"
15 #include "knn.h"
16
17
18
19 //**********************************************************************************************************************
20 vector<string> ClassifySeqsCommand::setParameters(){    
21         try {
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
23                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
26                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
27                 CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance", "kmer", "", "", "",false,false); parameters.push_back(psearch);
28                 CommandParameter pksize("ksize", "Number", "", "8", "", "", "",false,false); parameters.push_back(pksize);
29                 CommandParameter pmethod("method", "Multiple", "bayesian-knn", "bayesian", "", "", "",false,false); parameters.push_back(pmethod);
30                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
31                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
32                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
33                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
34                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
35                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
36                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
37                 CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
38                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
39                 CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                 
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "ClassifySeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string ClassifySeqsCommand::getHelpString(){    
54         try {
55                 string helpString = "";
56                 helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n";
57                 helpString += "The classify.seqs command parameters are reference, fasta, name, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
58                 helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
59                 helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast and distance. The default is kmer.\n";
60                 helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n";
61                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
62                 helpString += "The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n";
63                 helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n";
64                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
65 #ifdef USE_MPI
66                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
67 #endif
68                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
69                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
70                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
71                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
72                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
73                 helpString += "The numwanted parameter allows you to specify the number of sequence matches you want with the knn method.  The default is 10.\n";
74                 helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n";
75                 helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
76                 helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n";
77                 helpString += "The classify.seqs command should be in the following format: \n";
78                 helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
79                 helpString += "Example classify.seqs(fasta=amazon.fasta, reference=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n";
80                 helpString += "The .taxonomy file consists of 2 columns: 1 = your sequence name, 2 = the taxonomy for your sequence. \n";
81                 helpString += "The .tax.summary is a summary of the different taxonomies represented in your fasta file. \n";
82                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
83                 return helpString;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "ClassifySeqsCommand", "getHelpString");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 ClassifySeqsCommand::ClassifySeqsCommand(){     
92         try {
93                 abort = true; calledHelp = true; 
94                 setParameters();
95                 vector<string> tempOutNames;
96                 outputTypes["taxonomy"] = tempOutNames;
97                 outputTypes["taxsummary"] = tempOutNames;
98                 outputTypes["matchdist"] = tempOutNames;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
107         try {
108                 abort = false; calledHelp = false;   
109                 rdb = ReferenceDB::getInstance();
110                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();
117                         
118                         OptionParser parser(option);
119                         map<string, string> parameters = parser.getParameters(); 
120                         
121                         ValidParameters validParameter("classify.seqs");
122                         map<string, string>::iterator it;
123                         
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["taxonomy"] = tempOutNames;
132                         outputTypes["taxsummary"] = tempOutNames;
133                         outputTypes["matchdist"] = tempOutNames;
134                         
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter 
139                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142                                 string path;
143                                 it = parameters.find("reference");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
149                                 }
150                                 
151                                 it = parameters.find("taxonomy");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
157                                 }
158                                 
159                                 it = parameters.find("group");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
165                                 }
166                         }
167
168                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
169                         if (fastaFileName == "not found") {                             
170                                 //if there is a current fasta file, use it
171                                 string filename = m->getFastaFile(); 
172                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
173                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
174                         }
175                         else { 
176                                 m->splitAtDash(fastaFileName, fastaFileNames);
177                                 
178                                 //go through files and make sure they are good, if not, then disregard them
179                                 for (int i = 0; i < fastaFileNames.size(); i++) {
180                                         
181                                         bool ignore = false;
182                                         if (fastaFileNames[i] == "current") { 
183                                                 fastaFileNames[i] = m->getFastaFile(); 
184                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
185                                                 else {  
186                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
187                                                         //erase from file list
188                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
189                                                         i--;
190                                                 }
191                                         }
192                                         
193                                         if (!ignore) {
194                                                 
195                                                 if (inputDir != "") {
196                                                         string path = m->hasPath(fastaFileNames[i]);
197                                                         //if the user has not given a path then, add inputdir. else leave path alone.
198                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
199                                                 }
200                                                 
201                                                 int ableToOpen;
202                                                 
203                                                 ifstream in;
204                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
205                                         
206                                                 //if you can't open it, try default location
207                                                 if (ableToOpen == 1) {
208                                                         if (m->getDefaultPath() != "") { //default path is set
209                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
210                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
211                                                                 ifstream in2;
212                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
213                                                                 in2.close();
214                                                                 fastaFileNames[i] = tryPath;
215                                                         }
216                                                 }
217                                                 
218                                                 if (ableToOpen == 1) {
219                                                         if (m->getOutputDir() != "") { //default path is set
220                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
221                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
222                                                                 ifstream in2;
223                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
224                                                                 in2.close();
225                                                                 fastaFileNames[i] = tryPath;
226                                                         }
227                                                 }
228                                                 
229                                                 in.close();
230                                                 
231                                                 if (ableToOpen == 1) { 
232                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
233                                                         //erase from file list
234                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
235                                                         i--;
236                                                 }else {
237                                                         m->setFastaFile(fastaFileNames[i]);
238                                                 }
239                                         }
240                                         
241                                 }
242                                 
243                                 //make sure there is at least one valid file left
244                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
245                         }
246
247                         namefile = validParameter.validFile(parameters, "name", false);
248                         if (namefile == "not found") { namefile = "";  }
249
250                         else { 
251                                 m->splitAtDash(namefile, namefileNames);
252                                 
253                                 //go through files and make sure they are good, if not, then disregard them
254                                 for (int i = 0; i < namefileNames.size(); i++) {
255                                         bool ignore = false;
256                                         if (namefileNames[i] == "current") { 
257                                                 namefileNames[i] = m->getNameFile(); 
258                                                 if (namefileNames[i] != "") {  m->mothurOut("Using " + namefileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
259                                                 else {  
260                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
261                                                         //erase from file list
262                                                         namefileNames.erase(namefileNames.begin()+i);
263                                                         i--;
264                                                 }
265                                         }
266                                         
267                                         if (!ignore) {
268                                                 
269                                                 if (inputDir != "") {
270                                                         string path = m->hasPath(namefileNames[i]);
271                                                         //if the user has not given a path then, add inputdir. else leave path alone.
272                                                         if (path == "") {       namefileNames[i] = inputDir + namefileNames[i];         }
273                                                 }
274                                                 int ableToOpen;
275                                                 
276                                                 ifstream in;
277                                                 ableToOpen = m->openInputFile(namefileNames[i], in, "noerror");
278                                         
279                                                 //if you can't open it, try default location
280                                                 if (ableToOpen == 1) {
281                                                         if (m->getDefaultPath() != "") { //default path is set
282                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(namefileNames[i]);
283                                                                 m->mothurOut("Unable to open " + namefileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
284                                                                 ifstream in2;
285                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
286                                                                 in2.close();
287                                                                 namefileNames[i] = tryPath;
288                                                         }
289                                                 }
290                                                 
291                                                 if (ableToOpen == 1) {
292                                                         if (m->getOutputDir() != "") { //default path is set
293                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(namefileNames[i]);
294                                                                 m->mothurOut("Unable to open " + namefileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
295                                                                 ifstream in2;
296                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
297                                                                 in2.close();
298                                                                 namefileNames[i] = tryPath;
299                                                         }
300                                                 }
301                                                 in.close();
302                                                 
303                                                 if (ableToOpen == 1) { 
304                                                         m->mothurOut("Unable to open " + namefileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();  abort = true;
305                                                         //erase from file list
306                                                         namefileNames.erase(namefileNames.begin()+i);
307                                                         i--;
308                                                 }else {
309                                                         m->setNameFile(namefileNames[i]);
310                                                 }
311                                         }
312                                 }
313                         }
314
315                         if (namefile != "") {
316                                 if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); }
317                         }
318                         
319                         groupfile = validParameter.validFile(parameters, "group", false);
320                         if (groupfile == "not found") { groupfile = "";  }
321                         else { 
322                                 m->splitAtDash(groupfile, groupfileNames);
323                                 
324                                 //go through files and make sure they are good, if not, then disregard them
325                                 for (int i = 0; i < groupfileNames.size(); i++) {
326                                         if (inputDir != "") {
327                                                 string path = m->hasPath(groupfileNames[i]);
328                                                 //if the user has not given a path then, add inputdir. else leave path alone.
329                                                 if (path == "") {       groupfileNames[i] = inputDir + groupfileNames[i];               }
330                                         }
331                                         int ableToOpen;
332                                         
333                                         ifstream in;
334                                         ableToOpen = m->openInputFile(groupfileNames[i], in, "noerror");
335                                 
336                                         //if you can't open it, try default location
337                                         if (ableToOpen == 1) {
338                                                 if (m->getDefaultPath() != "") { //default path is set
339                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(groupfileNames[i]);
340                                                         m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
341                                                         ifstream in2;
342                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
343                                                         in2.close();
344                                                         groupfileNames[i] = tryPath;
345                                                 }
346                                         }
347                                         
348                                         if (ableToOpen == 1) {
349                                                 if (m->getOutputDir() != "") { //default path is set
350                                                         string tryPath = m->getOutputDir() + m->getSimpleName(groupfileNames[i]);
351                                                         m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
352                                                         ifstream in2;
353                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
354                                                         in2.close();
355                                                         groupfileNames[i] = tryPath;
356                                                 }
357                                         }
358                                         
359                                         in.close();
360                                         
361                                         if (ableToOpen == 1) { 
362                                                 m->mothurOut("Unable to open " + groupfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); groupfileNames[i] = "";
363                                                 //erase from file list
364                                                 groupfileNames.erase(groupfileNames.begin()+i);
365                                                 i--;
366                                         }else {
367                                                 m->setGroupFile(groupfileNames[i]);
368                                         }
369                                 }
370                         }
371
372                         if (groupfile != "") {
373                                 if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); }
374                         }else {
375                                 for (int i = 0; i < fastaFileNames.size(); i++) {  groupfileNames.push_back("");  }
376                         }
377                         
378                         //check for optional parameter and set defaults
379                         // ...at some point should added some additional type checking...
380                         string temp;
381                         temp = validParameter.validFile(parameters, "ksize", false);            if (temp == "not found"){       temp = "8";                             }
382                         convert(temp, kmerSize); 
383                         
384                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
385                         save = m->isTrue(temp); 
386                         rdb->save = save; 
387                         if (save) { //clear out old references
388                                 rdb->clearMemory();     
389                         }
390                         
391                         //this has to go after save so that if the user sets save=t and provides no reference we abort
392                         templateFileName = validParameter.validFile(parameters, "reference", true);
393                         if (templateFileName == "not found") { 
394                                 //check for saved reference sequences
395                                 if (rdb->referenceSeqs.size() != 0) {
396                                         templateFileName = "saved";
397                                 }else {
398                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the classify.seqs command."); 
399                                         m->mothurOutEndLine();
400                                         abort = true; 
401                                 }
402                         }else if (templateFileName == "not open") { abort = true; }     
403                         else {  if (save) {     rdb->setSavedReference(templateFileName);       }       }
404                         
405                         //this has to go after save so that if the user sets save=t and provides no reference we abort
406                         taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
407                         if (taxonomyFileName == "not found") { 
408                                 //check for saved reference sequences
409                                 if (rdb->wordGenusProb.size() != 0) {
410                                         taxonomyFileName = "saved";
411                                 }else {
412                                         m->mothurOut("[ERROR]: You don't have any saved taxonomy information and the taxonomy parameter is a required for the classify.seqs command."); 
413                                         m->mothurOutEndLine();
414                                         abort = true; 
415                                 }
416                         }else if (taxonomyFileName == "not open") { abort = true; }     
417                         else {  if (save) {     rdb->setSavedTaxonomy(taxonomyFileName);        }       }
418                         
419                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
420                         m->setProcessors(temp);
421                         convert(temp, processors); 
422                         
423                         search = validParameter.validFile(parameters, "search", false);         if (search == "not found"){     search = "kmer";                }
424                         
425                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "bayesian";    }
426                         
427                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
428                         convert(temp, match);  
429                         
430                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
431                         convert(temp, misMatch);  
432                         
433                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
434                         convert(temp, gapOpen);  
435                         
436                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
437                         convert(temp, gapExtend); 
438                         
439                         temp = validParameter.validFile(parameters, "numwanted", false);        if (temp == "not found"){       temp = "10";                    }
440                         convert(temp, numWanted);
441                         
442                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found"){       temp = "0";                             }
443                         convert(temp, cutoff);
444                         
445                         temp = validParameter.validFile(parameters, "probs", false);            if (temp == "not found"){       temp = "true";                  }
446                         probs = m->isTrue(temp);
447                         
448                         temp = validParameter.validFile(parameters, "iters", false);            if (temp == "not found") { temp = "100";                        }
449                         convert(temp, iters); 
450
451
452                         
453                         if ((method == "bayesian") && (search != "kmer"))  { 
454                                 m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
455                                 search = "kmer";
456                         }
457                 }
458                 
459         }
460         catch(exception& e) {
461                 m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
462                 exit(1);
463         }
464 }
465
466 //**********************************************************************************************************************
467 ClassifySeqsCommand::~ClassifySeqsCommand(){    
468         if (abort == false) {
469                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
470         }
471 }
472 //**********************************************************************************************************************
473
474 int ClassifySeqsCommand::execute(){
475         try {
476                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
477                 
478                 if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());           }
479                 else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
480                 else {
481                         m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
482                         m->mothurOutEndLine();
483                         classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());   
484                 }
485                 
486                 if (m->control_pressed) { delete classify; return 0; }
487                 
488                                 
489                 for (int s = 0; s < fastaFileNames.size(); s++) {
490                 
491                         m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
492                         
493                         string baseTName = taxonomyFileName;
494                         if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy();  }
495                         
496                         string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
497                         RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
498                         if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
499                         RippedTaxName +=  "."; 
500                 
501                         if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
502                         string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
503                         string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
504                         string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
505                         
506                         if ((method == "knn") && (search == "distance")) { 
507                                 string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "match.dist";
508                                 classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
509                         }
510                         
511                         outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
512                         outputNames.push_back(taxSummary);      outputTypes["taxsummary"].push_back(taxSummary);
513                         
514                         int start = time(NULL);
515                         int numFastaSeqs = 0;
516                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
517                         
518 #ifdef USE_MPI  
519                                 int pid, numSeqsPerProcessor; 
520                                 int tag = 2001;
521                                 vector<unsigned long int> MPIPos;
522                                 
523                                 MPI_Status status; 
524                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
525                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
526
527                                 MPI_File inMPI;
528                                 MPI_File outMPINewTax;
529                                 MPI_File outMPITempTax;
530                                                         
531                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
532                                 int inMode=MPI_MODE_RDONLY; 
533                                 
534                                 char outNewTax[1024];
535                                 strcpy(outNewTax, newTaxonomyFile.c_str());
536                                 
537                                 char outTempTax[1024];
538                                 strcpy(outTempTax, tempTaxonomyFile.c_str());
539                                 
540                                 char inFileName[1024];
541                                 strcpy(inFileName, fastaFileNames[s].c_str());
542
543                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
544                                 MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax);
545                                 MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax);
546                                 
547                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
548                                 
549                                 if (pid == 0) { //you are the root process 
550                                         
551                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
552                                         
553                                         //send file positions to all processes
554                                         for(int i = 1; i < processors; i++) { 
555                                                 MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
556                                                 MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
557                                         }
558                                         
559                                         //figure out how many sequences you have to align
560                                         numSeqsPerProcessor = numFastaSeqs / processors;
561                                         int startIndex =  pid * numSeqsPerProcessor;
562                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
563                                         
564                                 
565                                         //align your part
566                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
567                                         
568                                         if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
569                                         
570                                         for (int i = 1; i < processors; i++) {
571                                                 int done;
572                                                 MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
573                                         }
574                                 }else{ //you are a child process
575                                         MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
576                                         MPIPos.resize(numFastaSeqs+1);
577                                         MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
578                                         
579                                         //figure out how many sequences you have to align
580                                         numSeqsPerProcessor = numFastaSeqs / processors;
581                                         int startIndex =  pid * numSeqsPerProcessor;
582                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
583                                         
584                                         
585                                         //align your part
586                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
587                                         
588                                         if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
589
590                                         int done = 0;
591                                         MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
592                                 }
593                                 
594                                 //close files 
595                                 MPI_File_close(&inMPI);
596                                 MPI_File_close(&outMPINewTax);
597                                 MPI_File_close(&outMPITempTax);
598                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
599                                 
600 #else
601                 
602                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
603                                 
604                         for (int i = 0; i < (positions.size()-1); i++) {
605                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
606                         }       
607                         
608                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
609                         if(processors == 1){
610                                 numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
611                         }
612                         else{
613                                 processIDS.resize(0);
614                                 
615                                 numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); 
616                                 
617                         }
618         #else
619                         numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
620         #endif  
621 #endif
622
623                 m->mothurOutEndLine();
624                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
625                 start = time(NULL);
626
627
628                 #ifdef USE_MPI  
629                         if (pid == 0) {  //this part does not need to be paralellized
630                         
631                                 if(namefile != "") { m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();  MPIReadNamesFile(namefileNames[s]);  m->mothurOut("  Done."); m->mothurOutEndLine(); }
632                 #else
633                         //read namefile
634                         if(namefile != "") {
635                         
636                             m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
637                                 
638                                 nameMap.clear(); //remove old names
639                                 
640                                 ifstream inNames;
641                                 m->openInputFile(namefileNames[s], inNames);
642                                 
643                                 string firstCol, secondCol;
644                                 while(!inNames.eof()) {
645                                         inNames >> firstCol >> secondCol; m->gobble(inNames);
646                                         
647                                         vector<string> temp;
648                                         m->splitAtComma(secondCol, temp);
649                         
650                                         nameMap[firstCol] = temp;  
651                                 }
652                                 inNames.close();
653                                 
654                                 m->mothurOut("  Done."); m->mothurOutEndLine();
655                         }
656                 #endif
657
658                         string group = "";
659                         if (groupfile != "") {  group = groupfileNames[s]; }
660                         
661                         PhyloSummary taxaSum(baseTName, group);
662                         
663                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
664                 
665                         if (namefile == "") {  taxaSum.summarize(tempTaxonomyFile);  }
666                         else {
667                                 ifstream in;
668                                 m->openInputFile(tempTaxonomyFile, in);
669                                 
670                                 //read in users taxonomy file and add sequences to tree
671                                 string name, taxon;
672                                 
673                                 while(!in.eof()){
674                                         in >> name >> taxon; m->gobble(in);
675                                         
676                                         itNames = nameMap.find(name);
677                 
678                                         if (itNames == nameMap.end()) { 
679                                                 m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
680                                         }else{
681                                                 for (int i = 0; i < itNames->second.size(); i++) { 
682                                                         taxaSum.addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
683                                                 }
684                                                 itNames->second.clear();
685                                                 nameMap.erase(itNames->first);
686                                         }
687                                 }
688                                 in.close();
689                         }
690                         m->mothurRemove(tempTaxonomyFile);
691                         
692                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
693                         
694                         //print summary file
695                         ofstream outTaxTree;
696                         m->openOutputFile(taxSummary, outTaxTree);
697                         taxaSum.print(outTaxTree);
698                         outTaxTree.close();
699                         
700                         //output taxonomy with the unclassified bins added
701                         ifstream inTax;
702                         m->openInputFile(newTaxonomyFile, inTax);
703                         
704                         ofstream outTax;
705                         string unclass = newTaxonomyFile + ".unclass.temp";
706                         m->openOutputFile(unclass, outTax);
707                         
708                         //get maxLevel from phylotree so you know how many 'unclassified's to add
709                         int maxLevel = taxaSum.getMaxLevel();
710                                                         
711                         //read taxfile - this reading and rewriting is done to preserve the confidence scores.
712                         string name, taxon;
713                         while (!inTax.eof()) {
714                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } m->mothurRemove(unclass); delete classify; return 0; }
715
716                                 inTax >> name >> taxon; m->gobble(inTax);
717                                 
718                                 string newTax = addUnclassifieds(taxon, maxLevel);
719                                 
720                                 outTax << name << '\t' << newTax << endl;
721                         }
722                         inTax.close();  
723                         outTax.close();
724                         
725                         m->mothurRemove(newTaxonomyFile);
726                         rename(unclass.c_str(), newTaxonomyFile.c_str());
727                         
728                         m->mothurOutEndLine();
729                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
730                         
731                         #ifdef USE_MPI  
732                                 }
733                         #endif
734
735                         m->mothurOutEndLine();
736                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
737                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
738                         m->mothurOutEndLine();
739                 }
740                 
741                 //set taxonomy file as new current taxonomyfile
742                 string current = "";
743                 itTypes = outputTypes.find("taxonomy");
744                 if (itTypes != outputTypes.end()) {
745                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
746                 }
747                 
748                 delete classify;
749                 return 0;
750         }
751         catch(exception& e) {
752                 m->errorOut(e, "ClassifySeqsCommand", "execute");
753                 exit(1);
754         }
755 }
756
757 /**************************************************************************************************/
758 string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) {
759         try{
760                 string newTax, taxon;
761                 int level = 0;
762                 
763                 //keep what you have counting the levels
764                 while (tax.find_first_of(';') != -1) {
765                         //get taxon
766                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
767                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
768                         newTax += taxon;
769                         level++;
770                 }
771                 
772                 //add "unclassified" until you reach maxLevel
773                 while (level < maxlevel) {
774                         newTax += "unclassified;";
775                         level++;
776                 }
777                 
778                 return newTax;
779         }
780         catch(exception& e) {
781                 m->errorOut(e, "ClassifySeqsCommand", "addUnclassifieds");
782                 exit(1);
783         }
784 }
785
786 /**************************************************************************************************/
787
788 int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
789         try {
790 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
791                 int process = 1;
792                 int num = 0;
793                 
794                 //loop through and create all the processes you want
795                 while (process != processors) {
796                         int pid = fork();
797                         
798                         if (pid > 0) {
799                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
800                                 process++;
801                         }else if (pid == 0){
802                                 num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
803
804                                 //pass numSeqs to parent
805                                 ofstream out;
806                                 string tempFile = filename + toString(getpid()) + ".num.temp";
807                                 m->openOutputFile(tempFile, out);
808                                 out << num << endl;
809                                 out.close();
810
811                                 exit(0);
812                         }else { 
813                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
814                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
815                                 exit(0);
816                         }
817                 }
818                 
819                 //parent does its part
820                 num = driver(lines[0], taxFileName, tempTaxFile, filename);
821                 
822                 //force parent to wait until all the processes are done
823                 for (int i=0;i<processIDS.size();i++) { 
824                         int temp = processIDS[i];
825                         wait(&temp);
826                 }
827                 
828                 for (int i = 0; i < processIDS.size(); i++) {
829                         ifstream in;
830                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
831                         m->openInputFile(tempFile, in);
832                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
833                         in.close(); m->mothurRemove(m->getFullPathName(tempFile));
834                 }
835                 
836                 for(int i=0;i<processIDS.size();i++){
837                         appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
838                         appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
839                         m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
840                         m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
841                 }
842                 
843                 return num;
844 #endif          
845         }
846         catch(exception& e) {
847                 m->errorOut(e, "ClassifySeqsCommand", "createProcesses");
848                 exit(1);
849         }
850 }
851 /**************************************************************************************************/
852
853 void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
854         try{
855                 
856                 ofstream output;
857                 ifstream input;
858                 m->openOutputFileAppend(filename, output);
859                 m->openInputFile(temp, input);
860                 
861                 while(char c = input.get()){
862                         if(input.eof())         {       break;                  }
863                         else                            {       output << c;    }
864                 }
865                 
866                 input.close();
867                 output.close();
868         }
869         catch(exception& e) {
870                 m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
871                 exit(1);
872         }
873 }
874
875 //**********************************************************************************************************************
876
877 int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
878         try {
879                 ofstream outTax;
880                 m->openOutputFile(taxFName, outTax);
881                 
882                 ofstream outTaxSimple;
883                 m->openOutputFile(tempTFName, outTaxSimple);
884         
885                 ifstream inFASTA;
886                 m->openInputFile(filename, inFASTA);
887                 
888                 string taxonomy;
889
890                 inFASTA.seekg(filePos->start);
891
892                 bool done = false;
893                 int count = 0;
894                 
895                 while (!done) {
896                         if (m->control_pressed) { return 0; }
897                 
898                         Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
899                         
900                         if (candidateSeq->getName() != "") {
901                         
902                                 taxonomy = classify->getTaxonomy(candidateSeq);
903                                 
904                                 if (m->control_pressed) { delete candidateSeq; return 0; }
905
906                                 if (taxonomy != "bad seq") {
907                                         //output confidence scores or not
908                                         if (probs) {
909                                                 outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
910                                         }else{
911                                                 outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
912                                         }
913                                         
914                                         outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
915                                 }
916                                 count++;
917                         }
918                         delete candidateSeq;
919                         
920                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
921                                 unsigned long int pos = inFASTA.tellg();
922                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
923                         #else
924                                 if (inFASTA.eof()) { break; }
925                         #endif
926                         
927                         //report progress
928                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
929                         
930                 }
931                 //report progress
932                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
933                         
934                 inFASTA.close();
935                 outTax.close();
936                 outTaxSimple.close();
937                 
938                 return count;
939         }
940         catch(exception& e) {
941                 m->errorOut(e, "ClassifySeqsCommand", "driver");
942                 exit(1);
943         }
944 }
945 //**********************************************************************************************************************
946 #ifdef USE_MPI
947 int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector<unsigned long int>& MPIPos){
948         try {
949                 MPI_Status statusNew; 
950                 MPI_Status statusTemp; 
951                 MPI_Status status; 
952                 
953                 int pid;
954                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
955         
956                 string taxonomy;
957                 string outputString;
958
959                 for(int i=0;i<num;i++){
960                 
961                         if (m->control_pressed) { return 0; }
962                 
963                         //read next sequence
964                         int length = MPIPos[start+i+1] - MPIPos[start+i];
965                         char* buf4 = new char[length];
966                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
967                         
968                         string tempBuf = buf4;
969                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
970                         istringstream iss (tempBuf,istringstream::in);
971                         delete buf4;
972
973                         Sequence* candidateSeq = new Sequence(iss);
974                         
975                         if (candidateSeq->getName() != "") {
976                                 taxonomy = classify->getTaxonomy(candidateSeq);
977                                 
978                                 if (taxonomy != "bad seq") {
979                                         //output confidence scores or not
980                                         if (probs) {
981                                                 outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
982                                         }else{
983                                                 outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
984                                         }
985                                         
986                                         int length = outputString.length();
987                                         char* buf2 = new char[length];
988                                         memcpy(buf2, outputString.c_str(), length);
989                                 
990                                         MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
991                                         delete buf2;
992
993                                         outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
994                                         length = outputString.length();
995                                         char* buf = new char[length];
996                                         memcpy(buf, outputString.c_str(), length);
997                                 
998                                         MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
999                                         delete buf;
1000                                 }
1001                         }                               
1002                         delete candidateSeq;
1003                         
1004                         if((i+1) % 100 == 0){   cout << "Classifying sequence " << (i+1) << endl;       }
1005                 }
1006                 
1007                 if(num % 100 != 0){     cout << "Classifying sequence " << (num) << endl;       }
1008                 
1009                 
1010                 return 1;
1011         }
1012         catch(exception& e) {
1013                 m->errorOut(e, "ClassifySeqsCommand", "driverMPI");
1014                 exit(1);
1015         }
1016 }
1017
1018 //**********************************************************************************************************************
1019 int ClassifySeqsCommand::MPIReadNamesFile(string nameFilename){
1020         try {
1021         
1022                 nameMap.clear(); //remove old names
1023                 
1024                 MPI_File inMPI;
1025                 MPI_Offset size;
1026                 MPI_Status status;
1027
1028                 //char* inFileName = new char[nameFilename.length()];
1029                 //memcpy(inFileName, nameFilename.c_str(), nameFilename.length());
1030                 
1031                 char inFileName[1024];
1032                 strcpy(inFileName, nameFilename.c_str());
1033
1034                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
1035                 MPI_File_get_size(inMPI, &size);
1036                 //delete inFileName;
1037
1038                 char* buffer = new char[size];
1039                 MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
1040
1041                 string tempBuf = buffer;
1042                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
1043                 istringstream iss (tempBuf,istringstream::in);
1044                 delete buffer;
1045                 
1046                 string firstCol, secondCol;
1047                 while(!iss.eof()) {
1048                         iss >> firstCol >> secondCol; m->gobble(iss);
1049                         
1050                         vector<string> temp;
1051                         m->splitAtComma(secondCol, temp);
1052                         
1053                         nameMap[firstCol] = temp;  
1054                 }
1055         
1056                 MPI_File_close(&inMPI);
1057                 
1058                 return 1;
1059         }
1060         catch(exception& e) {
1061                 m->errorOut(e, "ClassifySeqsCommand", "MPIReadNamesFile");
1062                 exit(1);
1063         }
1064 }
1065 #endif
1066 /**************************************************************************************************/