classifyseqscommand.cpp

   1 /*
   2  *  classifyseqscommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 11/2/09.
   6  *  Copyright 2009 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "classifyseqscommand.h"
  11
  12
  13
  14 //**********************************************************************************************************************
  15 vector<string> ClassifySeqsCommand::setParameters(){
  16         try {
  17                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
  18                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
  19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
  20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
  21                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
  22                 CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance", "kmer", "", "", "",false,false); parameters.push_back(psearch);
  23                 CommandParameter pksize("ksize", "Number", "", "8", "", "", "",false,false); parameters.push_back(pksize);
  24                 CommandParameter pmethod("method", "Multiple", "bayesian-knn", "bayesian", "", "", "",false,false); parameters.push_back(pmethod);
  25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
  26                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
  27                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
  28                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
  29                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
  30                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
  31                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
  32                 CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
  33                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
  34                 CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted);
  35                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
  36                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
  37
  38                 vector<string> myArray;
  39                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
  40                 return myArray;
  41         }
  42         catch(exception& e) {
  43                 m->errorOut(e, "ClassifySeqsCommand", "setParameters");
  44                 exit(1);
  45         }
  46 }
  47 //**********************************************************************************************************************
  48 string ClassifySeqsCommand::getHelpString(){
  49         try {
  50                 string helpString = "";
  51                 helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n";
  52                 helpString += "The classify.seqs command parameters are reference, fasta, name, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
  53                 helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
  54                 helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast and distance. The default is kmer.\n";
  55                 helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n";
  56                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
  57                 helpString += "The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n";
  58                 helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n";
  59                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
  60 #ifdef USE_MPI
  61                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
  62 #endif
  63                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
  64                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
  65                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
  66                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
  67                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
  68                 helpString += "The numwanted parameter allows you to specify the number of sequence matches you want with the knn method.  The default is 10.\n";
  69                 helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n";
  70                 helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
  71                 helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n";
  72                 helpString += "The classify.seqs command should be in the following format: \n";
  73                 helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
  74                 helpString += "Example classify.seqs(fasta=amazon.fasta, reference=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n";
  75                 helpString += "The .taxonomy file consists of 2 columns: 1 = your sequence name, 2 = the taxonomy for your sequence. \n";
  76                 helpString += "The .tax.summary is a summary of the different taxonomies represented in your fasta file. \n";
  77                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
  78                 return helpString;
  79         }
  80         catch(exception& e) {
  81                 m->errorOut(e, "ClassifySeqsCommand", "getHelpString");
  82                 exit(1);
  83         }
  84 }
  85 //**********************************************************************************************************************
  86 ClassifySeqsCommand::ClassifySeqsCommand(){
  87         try {
  88                 abort = true; calledHelp = true;
  89                 setParameters();
  90                 vector<string> tempOutNames;
  91                 outputTypes["taxonomy"] = tempOutNames;
  92                 outputTypes["taxsummary"] = tempOutNames;
  93                 outputTypes["matchdist"] = tempOutNames;
  94         }
  95         catch(exception& e) {
  96                 m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
  97                 exit(1);
  98         }
  99 }
 100 //**********************************************************************************************************************
 101 ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
 102         try {
 103                 abort = false; calledHelp = false;
 104                 rdb = ReferenceDB::getInstance();
 105
 106                 //allow user to run help
 107                 if(option == "help") { help(); abort = true; calledHelp = true; }
 108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
 109
 110                 else {
 111                         vector<string> myArray = setParameters();
 112
 113                         OptionParser parser(option);
 114                         map<string, string> parameters = parser.getParameters();
 115
 116                         ValidParameters validParameter("classify.seqs");
 117                         map<string, string>::iterator it;
 118
 119                         //check to make sure all parameters are valid for command
 120                         for (it = parameters.begin(); it != parameters.end(); it++) {
 121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
 122                         }
 123
 124                         //initialize outputTypes
 125                         vector<string> tempOutNames;
 126                         outputTypes["taxonomy"] = tempOutNames;
 127                         outputTypes["taxsummary"] = tempOutNames;
 128                         outputTypes["matchdist"] = tempOutNames;
 129
 130                         //if the user changes the output directory command factory will send this info to us in the output parameter
 131                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 132
 133                         //if the user changes the input directory command factory will send this info to us in the output parameter
 134                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 135                         if (inputDir == "not found"){   inputDir = "";          }
 136                         else {
 137                                 string path;
 138                                 it = parameters.find("reference");
 139                                 //user has given a template file
 140                                 if(it != parameters.end()){
 141                                         path = m->hasPath(it->second);
 142                                         //if the user has not given a path then, add inputdir. else leave path alone.
 143                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
 144                                 }
 145
 146                                 it = parameters.find("taxonomy");
 147                                 //user has given a template file
 148                                 if(it != parameters.end()){
 149                                         path = m->hasPath(it->second);
 150                                         //if the user has not given a path then, add inputdir. else leave path alone.
 151                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 152                                 }
 153
 154                                 it = parameters.find("group");
 155                                 //user has given a template file
 156                                 if(it != parameters.end()){
 157                                         path = m->hasPath(it->second);
 158                                         //if the user has not given a path then, add inputdir. else leave path alone.
 159                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
 160                                 }
 161                         }
 162
 163                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
 164                         if (fastaFileName == "not found") {
 165                                 //if there is a current fasta file, use it
 166                                 string filename = m->getFastaFile();
 167                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
 168                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
 169                         }
 170                         else {
 171                                 m->splitAtDash(fastaFileName, fastaFileNames);
 172
 173                                 //go through files and make sure they are good, if not, then disregard them
 174                                 for (int i = 0; i < fastaFileNames.size(); i++) {
 175
 176                                         bool ignore = false;
 177                                         if (fastaFileNames[i] == "current") {
 178                                                 fastaFileNames[i] = m->getFastaFile();
 179                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
 180                                                 else {
 181                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
 182                                                         //erase from file list
 183                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
 184                                                         i--;
 185                                                 }
 186                                         }
 187
 188                                         if (!ignore) {
 189
 190                                                 if (inputDir != "") {
 191                                                         string path = m->hasPath(fastaFileNames[i]);
 192                                                         //if the user has not given a path then, add inputdir. else leave path alone.
 193                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
 194                                                 }
 195
 196                                                 int ableToOpen;
 197
 198                                                 ifstream in;
 199                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
 200
 201                                                 //if you can't open it, try default location
 202                                                 if (ableToOpen == 1) {
 203                                                         if (m->getDefaultPath() != "") { //default path is set
 204                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
 205                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
 206                                                                 ifstream in2;
 207                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
 208                                                                 in2.close();
 209                                                                 fastaFileNames[i] = tryPath;
 210                                                         }
 211                                                 }
 212
 213                                                 if (ableToOpen == 1) {
 214                                                         if (m->getOutputDir() != "") { //default path is set
 215                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
 216                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
 217                                                                 ifstream in2;
 218                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
 219                                                                 in2.close();
 220                                                                 fastaFileNames[i] = tryPath;
 221                                                         }
 222                                                 }
 223
 224                                                 in.close();
 225
 226                                                 if (ableToOpen == 1) {
 227                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
 228                                                         //erase from file list
 229                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
 230                                                         i--;
 231                                                 }else {
 232                                                         m->setFastaFile(fastaFileNames[i]);
 233                                                 }
 234                                         }
 235
 236                                 }
 237
 238                                 //make sure there is at least one valid file left
 239                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
 240                         }
 241
 242                         namefile = validParameter.validFile(parameters, "name", false);
 243                         if (namefile == "not found") { namefile = "";  }
 244
 245                         else {
 246                                 m->splitAtDash(namefile, namefileNames);
 247
 248                                 //go through files and make sure they are good, if not, then disregard them
 249                                 for (int i = 0; i < namefileNames.size(); i++) {
 250                                         bool ignore = false;
 251                                         if (namefileNames[i] == "current") {
 252                                                 namefileNames[i] = m->getNameFile();
 253                                                 if (namefileNames[i] != "") {  m->mothurOut("Using " + namefileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
 254                                                 else {
 255                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
 256                                                         //erase from file list
 257                                                         namefileNames.erase(namefileNames.begin()+i);
 258                                                         i--;
 259                                                 }
 260                                         }
 261
 262                                         if (!ignore) {
 263
 264                                                 if (inputDir != "") {
 265                                                         string path = m->hasPath(namefileNames[i]);
 266                                                         //if the user has not given a path then, add inputdir. else leave path alone.
 267                                                         if (path == "") {       namefileNames[i] = inputDir + namefileNames[i];         }
 268                                                 }
 269                                                 int ableToOpen;
 270
 271                                                 ifstream in;
 272                                                 ableToOpen = m->openInputFile(namefileNames[i], in, "noerror");
 273
 274                                                 //if you can't open it, try default location
 275                                                 if (ableToOpen == 1) {
 276                                                         if (m->getDefaultPath() != "") { //default path is set
 277                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(namefileNames[i]);
 278                                                                 m->mothurOut("Unable to open " + namefileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
 279                                                                 ifstream in2;
 280                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
 281                                                                 in2.close();
 282                                                                 namefileNames[i] = tryPath;
 283                                                         }
 284                                                 }
 285
 286                                                 if (ableToOpen == 1) {
 287                                                         if (m->getOutputDir() != "") { //default path is set
 288                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(namefileNames[i]);
 289                                                                 m->mothurOut("Unable to open " + namefileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
 290                                                                 ifstream in2;
 291                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
 292                                                                 in2.close();
 293                                                                 namefileNames[i] = tryPath;
 294                                                         }
 295                                                 }
 296                                                 in.close();
 297
 298                                                 if (ableToOpen == 1) {
 299                                                         m->mothurOut("Unable to open " + namefileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();  abort = true;
 300                                                         //erase from file list
 301                                                         namefileNames.erase(namefileNames.begin()+i);
 302                                                         i--;
 303                                                 }else {
 304                                                         m->setNameFile(namefileNames[i]);
 305                                                 }
 306                                         }
 307                                 }
 308                         }
 309
 310                         if (namefile != "") {
 311                                 if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); }
 312                         }
 313
 314                         groupfile = validParameter.validFile(parameters, "group", false);
 315                         if (groupfile == "not found") { groupfile = "";  }
 316                         else {
 317                                 m->splitAtDash(groupfile, groupfileNames);
 318
 319                                 //go through files and make sure they are good, if not, then disregard them
 320                                 for (int i = 0; i < groupfileNames.size(); i++) {
 321                                         if (inputDir != "") {
 322                                                 string path = m->hasPath(groupfileNames[i]);
 323                                                 //if the user has not given a path then, add inputdir. else leave path alone.
 324                                                 if (path == "") {       groupfileNames[i] = inputDir + groupfileNames[i];               }
 325                                         }
 326                                         int ableToOpen;
 327
 328                                         ifstream in;
 329                                         ableToOpen = m->openInputFile(groupfileNames[i], in, "noerror");
 330
 331                                         //if you can't open it, try default location
 332                                         if (ableToOpen == 1) {
 333                                                 if (m->getDefaultPath() != "") { //default path is set
 334                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(groupfileNames[i]);
 335                                                         m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
 336                                                         ifstream in2;
 337                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
 338                                                         in2.close();
 339                                                         groupfileNames[i] = tryPath;
 340                                                 }
 341                                         }
 342
 343                                         if (ableToOpen == 1) {
 344                                                 if (m->getOutputDir() != "") { //default path is set
 345                                                         string tryPath = m->getOutputDir() + m->getSimpleName(groupfileNames[i]);
 346                                                         m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
 347                                                         ifstream in2;
 348                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
 349                                                         in2.close();
 350                                                         groupfileNames[i] = tryPath;
 351                                                 }
 352                                         }
 353
 354                                         in.close();
 355
 356                                         if (ableToOpen == 1) {
 357                                                 m->mothurOut("Unable to open " + groupfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); groupfileNames[i] = "";
 358                                                 //erase from file list
 359                                                 groupfileNames.erase(groupfileNames.begin()+i);
 360                                                 i--;
 361                                         }else {
 362                                                 m->setGroupFile(groupfileNames[i]);
 363                                         }
 364                                 }
 365                         }
 366
 367                         if (groupfile != "") {
 368                                 if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); }
 369                         }else {
 370                                 for (int i = 0; i < fastaFileNames.size(); i++) {  groupfileNames.push_back("");  }
 371                         }
 372
 373                         //check for optional parameter and set defaults
 374                         // ...at some point should added some additional type checking...
 375                         string temp;
 376                         temp = validParameter.validFile(parameters, "ksize", false);            if (temp == "not found"){       temp = "8";                             }
 377                         convert(temp, kmerSize);
 378
 379                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
 380                         save = m->isTrue(temp);
 381                         //this is so the threads can quickly load the reference data
 382                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 383                         #else
 384                         if (processors != 1) { save = true; }
 385                         #endif
 386                         rdb->save = save;
 387                         if (save) { //clear out old references
 388                                 rdb->clearMemory();
 389                         }
 390
 391                         //this has to go after save so that if the user sets save=t and provides no reference we abort
 392                         templateFileName = validParameter.validFile(parameters, "reference", true);
 393                         if (templateFileName == "not found") {
 394                                 //check for saved reference sequences
 395                                 if (rdb->referenceSeqs.size() != 0) {
 396                                         templateFileName = "saved";
 397                                 }else {
 398                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the classify.seqs command.");
 399                                         m->mothurOutEndLine();
 400                                         abort = true;
 401                                 }
 402                         }else if (templateFileName == "not open") { abort = true; }
 403                         else {  if (save) {     rdb->setSavedReference(templateFileName);       }       }
 404
 405                         //this has to go after save so that if the user sets save=t and provides no reference we abort
 406                         taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
 407                         if (taxonomyFileName == "not found") {
 408                                 //check for saved reference sequences
 409                                 if (rdb->wordGenusProb.size() != 0) {
 410                                         taxonomyFileName = "saved";
 411                                 }else {
 412                                         m->mothurOut("[ERROR]: You don't have any saved taxonomy information and the taxonomy parameter is a required for the classify.seqs command.");
 413                                         m->mothurOutEndLine();
 414                                         abort = true;
 415                                 }
 416                         }else if (taxonomyFileName == "not open") { abort = true; }
 417                         else {  if (save) {     rdb->setSavedTaxonomy(taxonomyFileName);        }       }
 418
 419                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
 420                         m->setProcessors(temp);
 421                         convert(temp, processors);
 422
 423                         search = validParameter.validFile(parameters, "search", false);         if (search == "not found"){     search = "kmer";                }
 424
 425                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "bayesian";    }
 426
 427                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
 428                         convert(temp, match);
 429
 430                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
 431                         convert(temp, misMatch);
 432
 433                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
 434                         convert(temp, gapOpen);
 435
 436                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
 437                         convert(temp, gapExtend);
 438
 439                         temp = validParameter.validFile(parameters, "numwanted", false);        if (temp == "not found"){       temp = "10";                    }
 440                         convert(temp, numWanted);
 441
 442                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found"){       temp = "0";                             }
 443                         convert(temp, cutoff);
 444
 445                         temp = validParameter.validFile(parameters, "probs", false);            if (temp == "not found"){       temp = "true";                  }
 446                         probs = m->isTrue(temp);
 447
 448                         temp = validParameter.validFile(parameters, "iters", false);            if (temp == "not found") { temp = "100";                        }
 449                         convert(temp, iters);
 450
 451
 452
 453                         if ((method == "bayesian") && (search != "kmer"))  {
 454                                 m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
 455                                 search = "kmer";
 456                         }
 457                 }
 458
 459         }
 460         catch(exception& e) {
 461                 m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
 462                 exit(1);
 463         }
 464 }
 465
 466 //**********************************************************************************************************************
 467 ClassifySeqsCommand::~ClassifySeqsCommand(){
 468         if (abort == false) {
 469                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
 470         }
 471 }
 472 //**********************************************************************************************************************
 473
 474 int ClassifySeqsCommand::execute(){
 475         try {
 476                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 477
 478                 if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());           }
 479                 else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
 480                 else {
 481                         m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
 482                         m->mothurOutEndLine();
 483                         classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());
 484                 }
 485
 486                 if (m->control_pressed) { delete classify; return 0; }
 487
 488                 for (int s = 0; s < fastaFileNames.size(); s++) {
 489
 490                         m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
 491
 492                         string baseTName = taxonomyFileName;
 493                         if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy();  }
 494
 495                         string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
 496                         RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
 497                         if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
 498                         RippedTaxName +=  ".";
 499
 500                         if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
 501                         string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
 502                         string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
 503                         string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
 504
 505                         if ((method == "knn") && (search == "distance")) {
 506                                 string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "match.dist";
 507                                 classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
 508                         }
 509
 510                         outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
 511                         outputNames.push_back(taxSummary);      outputTypes["taxsummary"].push_back(taxSummary);
 512
 513                         int start = time(NULL);
 514                         int numFastaSeqs = 0;
 515                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
 516
 517 #ifdef USE_MPI
 518                                 int pid, numSeqsPerProcessor;
 519                                 int tag = 2001;
 520                                 vector<unsigned long long> MPIPos;
 521
 522                                 MPI_Status status;
 523                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 524                                 MPI_Comm_size(MPI_COMM_WORLD, &processors);
 525
 526                                 MPI_File inMPI;
 527                                 MPI_File outMPINewTax;
 528                                 MPI_File outMPITempTax;
 529
 530                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
 531                                 int inMode=MPI_MODE_RDONLY;
 532
 533                                 char outNewTax[1024];
 534                                 strcpy(outNewTax, newTaxonomyFile.c_str());
 535
 536                                 char outTempTax[1024];
 537                                 strcpy(outTempTax, tempTaxonomyFile.c_str());
 538
 539                                 char inFileName[1024];
 540                                 strcpy(inFileName, fastaFileNames[s].c_str());
 541
 542                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
 543                                 MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax);
 544                                 MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax);
 545
 546                                 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify;  return 0;  }
 547
 548                                 if (pid == 0) { //you are the root process
 549
 550                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
 551
 552                                         //send file positions to all processes
 553                                         for(int i = 1; i < processors; i++) {
 554                                                 MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 555                                                 MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
 556                                         }
 557
 558                                         //figure out how many sequences you have to align
 559                                         numSeqsPerProcessor = numFastaSeqs / processors;
 560                                         int startIndex =  pid * numSeqsPerProcessor;
 561                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
 562
 563
 564                                         //align your part
 565                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
 566
 567                                         if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
 568
 569                                         for (int i = 1; i < processors; i++) {
 570                                                 int done;
 571                                                 MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 572                                         }
 573                                 }else{ //you are a child process
 574                                         MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 575                                         MPIPos.resize(numFastaSeqs+1);
 576                                         MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
 577
 578                                         //figure out how many sequences you have to align
 579                                         numSeqsPerProcessor = numFastaSeqs / processors;
 580                                         int startIndex =  pid * numSeqsPerProcessor;
 581                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
 582
 583
 584                                         //align your part
 585                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
 586
 587                                         if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
 588
 589                                         int done = 0;
 590                                         MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 591                                 }
 592
 593                                 //close files
 594                                 MPI_File_close(&inMPI);
 595                                 MPI_File_close(&outMPINewTax);
 596                                 MPI_File_close(&outMPITempTax);
 597                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 598
 599 #else
 600
 601                         vector<unsigned long long> positions;
 602 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 603                         positions = m->divideFile(fastaFileNames[s], processors);
 604                         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
 605 #else
 606                         if (processors == 1) {
 607                                 lines.push_back(new linePair(0, 1000));
 608                         }else {
 609                                 positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs);
 610
 611                                 //figure out how many sequences you have to process
 612                                 int numSeqsPerProcessor = numFastaSeqs / processors;
 613                                 for (int i = 0; i < processors; i++) {
 614                                         int startIndex =  i * numSeqsPerProcessor;
 615                                         if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
 616                                         lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
 617                                 }
 618                         }
 619 #endif
 620                         if(processors == 1){
 621                                 numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
 622                         }else{
 623                                 numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
 624                         }
 625 #endif
 626
 627                 m->mothurOutEndLine();
 628                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
 629                 start = time(NULL);
 630
 631
 632                 #ifdef USE_MPI
 633                         if (pid == 0) {  //this part does not need to be paralellized
 634
 635                                 if(namefile != "") { m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();  MPIReadNamesFile(namefileNames[s]);  m->mothurOut("  Done."); m->mothurOutEndLine(); }
 636                 #else
 637                         //read namefile
 638                         if(namefile != "") {
 639
 640                             m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
 641
 642                                 nameMap.clear(); //remove old names
 643
 644                                 ifstream inNames;
 645                                 m->openInputFile(namefileNames[s], inNames);
 646
 647                                 string firstCol, secondCol;
 648                                 while(!inNames.eof()) {
 649                                         inNames >> firstCol >> secondCol; m->gobble(inNames);
 650
 651                                         vector<string> temp;
 652                                         m->splitAtComma(secondCol, temp);
 653
 654                                         nameMap[firstCol] = temp;
 655                                 }
 656                                 inNames.close();
 657
 658                                 m->mothurOut("  Done."); m->mothurOutEndLine();
 659                         }
 660                 #endif
 661
 662                         string group = "";
 663                         if (groupfile != "") {  group = groupfileNames[s]; }
 664
 665                         PhyloSummary taxaSum(baseTName, group);
 666
 667                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
 668
 669                         if (namefile == "") {  taxaSum.summarize(tempTaxonomyFile);  }
 670                         else {
 671                                 ifstream in;
 672                                 m->openInputFile(tempTaxonomyFile, in);
 673
 674                                 //read in users taxonomy file and add sequences to tree
 675                                 string name, taxon;
 676
 677                                 while(!in.eof()){
 678                                         in >> name >> taxon; m->gobble(in);
 679
 680                                         itNames = nameMap.find(name);
 681
 682                                         if (itNames == nameMap.end()) {
 683                                                 m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
 684                                         }else{
 685                                                 for (int i = 0; i < itNames->second.size(); i++) {
 686                                                         taxaSum.addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
 687                                                 }
 688                                                 itNames->second.clear();
 689                                                 nameMap.erase(itNames->first);
 690                                         }
 691                                 }
 692                                 in.close();
 693                         }
 694                         m->mothurRemove(tempTaxonomyFile);
 695
 696                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
 697
 698                         //print summary file
 699                         ofstream outTaxTree;
 700                         m->openOutputFile(taxSummary, outTaxTree);
 701                         taxaSum.print(outTaxTree);
 702                         outTaxTree.close();
 703
 704                         //output taxonomy with the unclassified bins added
 705                         ifstream inTax;
 706                         m->openInputFile(newTaxonomyFile, inTax);
 707
 708                         ofstream outTax;
 709                         string unclass = newTaxonomyFile + ".unclass.temp";
 710                         m->openOutputFile(unclass, outTax);
 711
 712                         //get maxLevel from phylotree so you know how many 'unclassified's to add
 713                         int maxLevel = taxaSum.getMaxLevel();
 714
 715                         //read taxfile - this reading and rewriting is done to preserve the confidence scores.
 716                         string name, taxon;
 717                         while (!inTax.eof()) {
 718                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } m->mothurRemove(unclass); delete classify; return 0; }
 719
 720                                 inTax >> name >> taxon; m->gobble(inTax);
 721
 722                                 string newTax = addUnclassifieds(taxon, maxLevel);
 723
 724                                 outTax << name << '\t' << newTax << endl;
 725                         }
 726                         inTax.close();
 727                         outTax.close();
 728
 729                         m->mothurRemove(newTaxonomyFile);
 730                         rename(unclass.c_str(), newTaxonomyFile.c_str());
 731
 732                         m->mothurOutEndLine();
 733                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
 734
 735                         #ifdef USE_MPI
 736                                 }
 737                         #endif
 738
 739                         m->mothurOutEndLine();
 740                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 741                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 742                         m->mothurOutEndLine();
 743                 }
 744
 745                 //set taxonomy file as new current taxonomyfile
 746                 string current = "";
 747                 itTypes = outputTypes.find("taxonomy");
 748                 if (itTypes != outputTypes.end()) {
 749                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
 750                 }
 751
 752                 delete classify;
 753
 754                 return 0;
 755         }
 756         catch(exception& e) {
 757                 m->errorOut(e, "ClassifySeqsCommand", "execute");
 758                 exit(1);
 759         }
 760 }
 761
 762 /**************************************************************************************************/
 763 string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) {
 764         try{
 765                 string newTax, taxon;
 766                 int level = 0;
 767
 768                 //keep what you have counting the levels
 769                 while (tax.find_first_of(';') != -1) {
 770                         //get taxon
 771                         taxon = tax.substr(0,tax.find_first_of(';'))+';';
 772                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
 773                         newTax += taxon;
 774                         level++;
 775                 }
 776
 777                 //add "unclassified" until you reach maxLevel
 778                 while (level < maxlevel) {
 779                         newTax += "unclassified;";
 780                         level++;
 781                 }
 782
 783                 return newTax;
 784         }
 785         catch(exception& e) {
 786                 m->errorOut(e, "ClassifySeqsCommand", "addUnclassifieds");
 787                 exit(1);
 788         }
 789 }
 790
 791 /**************************************************************************************************/
 792
 793 int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
 794         try {
 795
 796                 int num = 0;
 797                 processIDS.clear();
 798
 799 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 800                 int process = 1;
 801
 802                 //loop through and create all the processes you want
 803                 while (process != processors) {
 804                         int pid = fork();
 805
 806                         if (pid > 0) {
 807                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 808                                 process++;
 809                         }else if (pid == 0){
 810                                 num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
 811
 812                                 //pass numSeqs to parent
 813                                 ofstream out;
 814                                 string tempFile = filename + toString(getpid()) + ".num.temp";
 815                                 m->openOutputFile(tempFile, out);
 816                                 out << num << endl;
 817                                 out.close();
 818
 819                                 exit(0);
 820                         }else {
 821                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 822                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 823                                 exit(0);
 824                         }
 825                 }
 826
 827                 //parent does its part
 828                 num = driver(lines[0], taxFileName, tempTaxFile, filename);
 829
 830                 //force parent to wait until all the processes are done
 831                 for (int i=0;i<processIDS.size();i++) {
 832                         int temp = processIDS[i];
 833                         wait(&temp);
 834                 }
 835
 836                 for (int i = 0; i < processIDS.size(); i++) {
 837                         ifstream in;
 838                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
 839                         m->openInputFile(tempFile, in);
 840                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
 841                         in.close(); m->mothurRemove(m->getFullPathName(tempFile));
 842                 }
 843 #else
 844                 //////////////////////////////////////////////////////////////////////////////////////////////////////
 845                 //Windows version shared memory, so be careful when passing variables through the alignData struct.
 846                 //Above fork() will clone, so memory is separate, but that's not the case with windows,
 847                 //////////////////////////////////////////////////////////////////////////////////////////////////////
 848
 849                 vector<classifyData*> pDataArray;
 850                 DWORD   dwThreadIdArray[processors-1];
 851                 HANDLE  hThreadArray[processors-1];
 852
 853                 //Create processor worker threads.
 854                 for( int i=0; i<processors-1; i++ ){
 855                         // Allocate memory for thread data.
 856                         string extension = "";
 857                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
 858
 859                         classifyData* tempclass = new classifyData(probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i);
 860                         pDataArray.push_back(tempclass);
 861
 862                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
 863                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
 864                         hThreadArray[i] = CreateThread(NULL, 0, MyClassThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
 865
 866                 }
 867
 868                 //parent does its part
 869                 num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", filename);
 870                 processIDS.push_back((processors-1));
 871
 872                 //Wait until all threads have terminated.
 873                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
 874
 875                 //Close all thread handles and free memory allocations.
 876                 for(int i=0; i < pDataArray.size(); i++){
 877                         num += pDataArray[i]->count;
 878                         CloseHandle(hThreadArray[i]);
 879                         delete pDataArray[i];
 880                 }
 881
 882         #endif
 883
 884                 for(int i=0;i<processIDS.size();i++){
 885                         appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
 886                         appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
 887                         m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
 888                         m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
 889                 }
 890
 891                 return num;
 892
 893         }
 894         catch(exception& e) {
 895                 m->errorOut(e, "ClassifySeqsCommand", "createProcesses");
 896                 exit(1);
 897         }
 898 }
 899 /**************************************************************************************************/
 900
 901 void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
 902         try{
 903
 904                 ofstream output;
 905                 ifstream input;
 906                 m->openOutputFileAppend(filename, output);
 907                 m->openInputFile(temp, input);
 908
 909                 while(char c = input.get()){
 910                         if(input.eof())         {       break;                  }
 911                         else                            {       output << c;    }
 912                 }
 913
 914                 input.close();
 915                 output.close();
 916         }
 917         catch(exception& e) {
 918                 m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
 919                 exit(1);
 920         }
 921 }
 922
 923 //**********************************************************************************************************************
 924
 925 int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
 926         try {
 927                 ofstream outTax;
 928                 m->openOutputFile(taxFName, outTax);
 929
 930                 ofstream outTaxSimple;
 931                 m->openOutputFile(tempTFName, outTaxSimple);
 932
 933                 ifstream inFASTA;
 934                 m->openInputFile(filename, inFASTA);
 935
 936                 string taxonomy;
 937
 938                 inFASTA.seekg(filePos->start);
 939
 940                 bool done = false;
 941                 int count = 0;
 942
 943                 while (!done) {
 944                         if (m->control_pressed) { return 0; }
 945
 946                         Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
 947
 948                         if (candidateSeq->getName() != "") {
 949
 950                                 taxonomy = classify->getTaxonomy(candidateSeq);
 951
 952                                 if (m->control_pressed) { delete candidateSeq; return 0; }
 953
 954                                 if (taxonomy != "bad seq") {
 955                                         //output confidence scores or not
 956                                         if (probs) {
 957                                                 outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
 958                                         }else{
 959                                                 outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
 960                                         }
 961
 962                                         outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
 963                                 }
 964                                 count++;
 965                         }
 966                         delete candidateSeq;
 967
 968                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 969                                 unsigned long long pos = inFASTA.tellg();
 970                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
 971                         #else
 972                                 if (inFASTA.eof()) { break; }
 973                         #endif
 974
 975                         //report progress
 976                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
 977
 978                 }
 979                 //report progress
 980                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
 981
 982                 inFASTA.close();
 983                 outTax.close();
 984                 outTaxSimple.close();
 985
 986                 return count;
 987         }
 988         catch(exception& e) {
 989                 m->errorOut(e, "ClassifySeqsCommand", "driver");
 990                 exit(1);
 991         }
 992 }
 993 //**********************************************************************************************************************
 994 #ifdef USE_MPI
 995 int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector<unsigned long long>& MPIPos){
 996         try {
 997                 MPI_Status statusNew;
 998                 MPI_Status statusTemp;
 999                 MPI_Status status;
1000
1001                 int pid;
1002                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1003
1004                 string taxonomy;
1005                 string outputString;
1006
1007                 for(int i=0;i<num;i++){
1008
1009                         if (m->control_pressed) { return 0; }
1010
1011                         //read next sequence
1012                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1013                         char* buf4 = new char[length];
1014                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1015
1016                         string tempBuf = buf4;
1017                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1018                         istringstream iss (tempBuf,istringstream::in);
1019                         delete buf4;
1020
1021                         Sequence* candidateSeq = new Sequence(iss);
1022
1023                         if (candidateSeq->getName() != "") {
1024                                 taxonomy = classify->getTaxonomy(candidateSeq);
1025
1026                                 if (taxonomy != "bad seq") {
1027                                         //output confidence scores or not
1028                                         if (probs) {
1029                                                 outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
1030                                         }else{
1031                                                 outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
1032                                         }
1033
1034                                         int length = outputString.length();
1035                                         char* buf2 = new char[length];
1036                                         memcpy(buf2, outputString.c_str(), length);
1037
1038                                         MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
1039                                         delete buf2;
1040
1041                                         outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
1042                                         length = outputString.length();
1043                                         char* buf = new char[length];
1044                                         memcpy(buf, outputString.c_str(), length);
1045
1046                                         MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
1047                                         delete buf;
1048                                 }
1049                         }
1050                         delete candidateSeq;
1051
1052                         if((i+1) % 100 == 0){   cout << "Classifying sequence " << (i+1) << endl;       }
1053                 }
1054
1055                 if(num % 100 != 0){     cout << "Classifying sequence " << (num) << endl;       }
1056
1057
1058                 return 1;
1059         }
1060         catch(exception& e) {
1061                 m->errorOut(e, "ClassifySeqsCommand", "driverMPI");
1062                 exit(1);
1063         }
1064 }
1065
1066 //**********************************************************************************************************************
1067 int ClassifySeqsCommand::MPIReadNamesFile(string nameFilename){
1068         try {
1069
1070                 nameMap.clear(); //remove old names
1071
1072                 MPI_File inMPI;
1073                 MPI_Offset size;
1074                 MPI_Status status;
1075
1076                 //char* inFileName = new char[nameFilename.length()];
1077                 //memcpy(inFileName, nameFilename.c_str(), nameFilename.length());
1078
1079                 char inFileName[1024];
1080                 strcpy(inFileName, nameFilename.c_str());
1081
1082                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
1083                 MPI_File_get_size(inMPI, &size);
1084                 //delete inFileName;
1085
1086                 char* buffer = new char[size];
1087                 MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
1088
1089                 string tempBuf = buffer;
1090                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
1091                 istringstream iss (tempBuf,istringstream::in);
1092                 delete buffer;
1093
1094                 string firstCol, secondCol;
1095                 while(!iss.eof()) {
1096                         iss >> firstCol >> secondCol; m->gobble(iss);
1097
1098                         vector<string> temp;
1099                         m->splitAtComma(secondCol, temp);
1100
1101                         nameMap[firstCol] = temp;
1102                 }
1103
1104                 MPI_File_close(&inMPI);
1105
1106                 return 1;
1107         }
1108         catch(exception& e) {
1109                 m->errorOut(e, "ClassifySeqsCommand", "MPIReadNamesFile");
1110                 exit(1);
1111         }
1112 }
1113 #endif
1114 /**************************************************************************************************/