]> git.donarmstrong.com Git - mothur.git/commitdiff
finished added bygroup processing of chimeras in chimera.slayer and chimera.uchime...
authorwestcott <westcott>
Wed, 21 Sep 2011 18:23:15 +0000 (18:23 +0000)
committerwestcott <westcott>
Wed, 21 Sep 2011 18:23:15 +0000 (18:23 +0000)
chimeraslayercommand.cpp
chimeraslayercommand.h
chimerauchimecommand.cpp
mothurout.cpp
mothurout.h
qualityscores.cpp
seqsummarycommand.cpp

index 0958630862ebf3e61c9d2ec23e5618ddaa868ef1..629eca965b3ca1c084c3d837b6a2870ea8fa43ec 100644 (file)
@@ -10,6 +10,7 @@
 #include "chimeraslayercommand.h"
 #include "deconvolutecommand.h"
 #include "referencedb.h"
+#include "sequenceparser.h"
 
 //**********************************************************************************************************************
 vector<string> ChimeraSlayerCommand::setParameters(){  
@@ -17,6 +18,7 @@ vector<string> ChimeraSlayerCommand::setParameters(){
                CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pwindow("window", "Number", "", "50", "", "", "",false,false); parameters.push_back(pwindow);
                CommandParameter pksize("ksize", "Number", "", "7", "", "", "",false,false); parameters.push_back(pksize);
                CommandParameter pmatch("match", "Number", "", "5.0", "", "", "",false,false); parameters.push_back(pmatch);
@@ -58,6 +60,7 @@ string ChimeraSlayerCommand::getHelpString(){
                helpString += "The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
+               helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
@@ -298,6 +301,83 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        
                        if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                        
+                       bool hasGroup = true;
+                       groupfile = validParameter.validFile(parameters, "group", false);
+                       if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
+                       else { 
+                               m->splitAtDash(groupfile, groupFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < groupFileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (groupFileNames[i] == "current") { 
+                                               groupFileNames[i] = m->getGroupFile(); 
+                                               if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       groupFileNames.erase(groupFileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(groupFileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               groupFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               groupFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       groupFileNames.erase(groupFileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setGroupFile(groupFileNames[i]);
+                                               }
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       
+                       if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+                       
+                       
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                        
@@ -450,6 +530,8 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        if ((search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
                        
                        if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+                       if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+
                }
        }
        catch(exception& e) {
@@ -469,38 +551,59 @@ int ChimeraSlayerCommand::execute(){
                
                        int start = time(NULL); 
                        
-                       if (templatefile == "self") { 
+                       //you provided a groupfile
+                       string groupFile = "";
+                       if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
+                       
+                       //maps a filename to priority map. 
+                       //if no groupfile this is fastafileNames[s] -> prioirity
+                       //if groupfile then this is each groups seqs -> priority
+                       map<string, map<string, int> > fileToPriority; 
+                       map<string, map<string, int> >::iterator itFile;
+                       map<string, string> fileGroup;
+                       map<string, int> priority;
+                       fileToPriority[fastaFileNames[s]] = priority; //default
+                       fileGroup[fastaFileNames[s]] = "noGroup";
+                       SequenceParser* parser = NULL;
+                       int totalSeqs = 0;
+                       int totalChimeras = 0;
+                       
+                       if ((templatefile == "self") && (groupFile == "")) { 
+                               fileGroup.clear();
+                               fileToPriority.clear();
                                if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
                                string nameFile = "";
                                if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
                                        nameFile = nameFileNames[s];
-                               }else {
-                                       m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
-                                       
-                                       //use unique.seqs to create new name and fastafile
-                                       string inputString = "fasta=" + fastaFileNames[s];
-                                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
-                                       m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-                                       
-                                       Command* uniqueCommand = new DeconvoluteCommand(inputString);
-                                       uniqueCommand->execute();
-                                       
-                                       map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
-                                       
-                                       delete uniqueCommand;
-                                       
-                                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
-                                       
-                                       nameFile = filenames["name"][0];
-                                       fastaFileNames[s] = filenames["fasta"][0];
-                               }
+                               }else {  nameFile = getNamesFile(fastaFileNames[s]); }
                                
                                //sort fastafile by abundance, returns new sorted fastafile name
                                m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); 
                                priority = sortFastaFile(fastaFileNames[s], nameFile);
                                m->mothurOut("Done."); m->mothurOutEndLine();
                                
+                               fileToPriority[fastaFileNames[s]] = priority;
                                if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
+                       }else if ((templatefile == "self") && (groupFile != "")) {
+                               fileGroup.clear();
+                               fileToPriority.clear();
+                               string nameFile = "";
+                               if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
+                                       nameFile = nameFileNames[s];
+                               }else { nameFile = getNamesFile(fastaFileNames[s]); }
+                               
+                               //Parse sequences by group
+                               parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+                               vector<string> groups = parser->getNamesOfGroups();
+                               
+                               for (int i = 0; i < groups.size(); i++) {
+                                       vector<Sequence> thisGroupsSeqs = parser->getSeqs(groups[i]);
+                                       map<string, string> thisGroupsMap = parser->getNameMap(groups[i]);
+                                       string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta";
+                                       priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); 
+                                       fileToPriority[newFastaFile] = priority;
+                                       fileGroup[newFastaFile] = groups[i];
+                               }
                        }
                        
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
@@ -508,225 +611,93 @@ int ChimeraSlayerCommand::execute(){
                        string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "slayer.accnos";
                        string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "slayer.fasta";
                        
-                       //create chimera here if you are mac or linux because fork will copy for you. Create in create processes instead if you are windows.
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI
-                       if (templatefile != "self") { //you want to run slayer with a reference template
-                               chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());      
-                       }else {
-                               chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());    
-                       }
+                       //clears files
+                       ofstream out, out1, out2;
+                       m->openOutputFile(outputFileName, out); out.close(); 
+                       m->openOutputFile(accnosFileName, out1); out1.close();
+                       if (trim) { m->openOutputFile(trimFastaFileName, out2); out2.close(); }
+                       outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
+                       outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
+                       if (trim) {  outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }
                        
-                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
                        
-                       if (chimera->getUnaligned()) { 
-                               m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
-                               delete chimera;
-                               return 0; 
-                       }
-                       templateSeqsLength = chimera->getLength();
-                       #else
-                       if (processors == 1) {
-                               if (templatefile != "self") { //you want to run slayer with a reference template
-                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());      
-                               }else {
-                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());    
-                               }
+                       for (itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) {
                                
-                               if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
+                               string thisFastaName = itFile->first;
+                               map<string, int> thisPriority = itFile->second;
                                
-                               if (chimera->getUnaligned()) { 
-                                       m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
-                                       delete chimera;
-                                       return 0; 
-                               }
-                               templateSeqsLength = chimera->getLength();
-                       }
-                       #endif
-                       
-               #ifdef USE_MPI  
-                       int pid, numSeqsPerProcessor; 
-                               int tag = 2001;
-                               vector<unsigned long long> MPIPos;
+                               //this is true when you have parsed by groups
+                               if (fileToPriority.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine();  }
                                
-                               MPI_Status status; 
-                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
-
-                               MPI_File inMPI;
-                               MPI_File outMPI;
-                               MPI_File outMPIAccnos;
-                               MPI_File outMPIFasta;
+                               string thisoutputFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera";
+                               string thisaccnosFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos";
+                               string thistrimFastaFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta";
                                
-                               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
-                               int inMode=MPI_MODE_RDONLY; 
+                               //create chimera here if you are mac or linux because fork will copy for you. Create in create processes instead if you are windows.
+                               if (processors == 1) { templateSeqsLength = setupChimera(thisFastaName, thisPriority); }
+                               else {
+                                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI
+                                               templateSeqsLength = setupChimera(thisFastaName, thisPriority);
+                                       #endif
+                               }
                                
-                               char outFilename[1024];
-                               strcpy(outFilename, outputFileName.c_str());
+                               if (m->control_pressed) {  if (parser != NULL) { delete parser; } for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
                                
-                               char outAccnosFilename[1024];
-                               strcpy(outAccnosFilename, accnosFileName.c_str());
-                       
-                               char outFastaFilename[1024];
-                               strcpy(outFastaFilename, trimFastaFileName.c_str());
-                               
-                               char inFileName[1024];
-                               strcpy(inFileName, fastaFileNames[s].c_str());
-
-                               MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                               MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
-                               MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
-                               if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
-
-                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }   delete chimera; return 0;  }
-                       
-                               if (pid == 0) { //you are the root process 
-                                       m->mothurOutEndLine();
-                                       m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
-                                       m->mothurOutEndLine();
-               
-                                       string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
-                                       
-                                       //print header
-                                       int length = outTemp.length();
-                                       char* buf2 = new char[length];
-                                       memcpy(buf2, outTemp.c_str(), length);
-
-                                       MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
-                                       delete buf2;
-
-                                       MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
+                       #ifdef USE_MPI  
+                               MPIExecute(thisFastaName, thisoutputFileName, thisaccnosFileName, thistrimFastaFileName);
+                               if (m->control_pressed) { outputTypes.clear();  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0;  }
+                       #else
+                                       //break up file
+                                       vector<unsigned long long> positions; 
+                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                                       positions = m->divideFile(thisFastaName, processors);
+                                       for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
+                               #else
+                                       if (processors == 1) {  lines.push_back(new linePair(0, 1000)); }
+                                       else {
+                                               positions = m->setFilePosFasta(thisFastaName, numSeqs); 
                                        
-                                       if (templatefile != "self") { //if template=self we can only use 1 processor
-                                               //send file positions to all processes
-                                               for(int i = 1; i < processors; i++) { 
-                                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
-                                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                               //figure out how many sequences you have to process
+                                               int numSeqsPerProcessor = numSeqs / processors;
+                                               for (int i = 0; i < processors; i++) {
+                                                       int startIndex =  i * numSeqsPerProcessor;
+                                                       if(i == (processors - 1)){      numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;        }
+                                                       lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
                                                }
                                        }
-                                       //figure out how many sequences you have to align
-                                       numSeqsPerProcessor = numSeqs / processors;
-                                       int startIndex =  pid * numSeqsPerProcessor;
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                                       
-                                       if (templatefile == "self") { //if template=self we can only use 1 processor
-                                               startIndex = 0;
-                                               numSeqsPerProcessor = numSeqs;
-                                       }
-                                       
-                                       //do your part
-                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
+                               #endif
+                               
+                               if(processors == 1){
+                                       numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName);
                                        
                                        int numNoParents = chimera->getNumNoParents();
-                                       int temp;
-                                       for(int i = 1; i < processors; i++) { 
-                                               MPI_Recv(&temp, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &status);
-                                               numNoParents += temp;
-                                       }
-                                       
-                                       
-                                       if (numSeqs == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
-                                       
-                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  delete chimera; return 0;  }
-
-                               }else{ //you are a child process
-                                       if (templatefile != "self") { //if template=self we can only use 1 processor
-                                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                               MPIPos.resize(numSeqs+1);
-                                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-                                       
-                                               //figure out how many sequences you have to align
-                                               numSeqsPerProcessor = numSeqs / processors;
-                                               int startIndex =  pid * numSeqsPerProcessor;
-                                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                                       if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
                                        
-                                               //do your part
-                                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
-                                               
-                                               int numNoParents = chimera->getNumNoParents();
-                                               MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
-                                       
-                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
-                               
-                                       }
-                               }
+                               }else{ numSeqs = createProcesses(thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName); }
                                
-                               //close files 
-                               MPI_File_close(&inMPI);
-                               MPI_File_close(&outMPI);
-                               MPI_File_close(&outMPIAccnos); 
-                               if (trim) { MPI_File_close(&outMPIFasta); }
-                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                               if (m->control_pressed) { if (parser != NULL) { delete parser; }  outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                
-               #else
-                       //break up file
-                       vector<unsigned long long> positions; 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       positions = m->divideFile(fastaFileNames[s], processors);
-                       for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
-#else
-                       if (processors == 1) {
-                               lines.push_back(new linePair(0, 1000));
-                       }else {
-                               positions = m->setFilePosFasta(fastaFileNames[s], numSeqs); 
-                               
-                               //figure out how many sequences you have to process
-                               int numSeqsPerProcessor = numSeqs / processors;
-                               for (int i = 0; i < processors; i++) {
-                                       int startIndex =  i * numSeqsPerProcessor;
-                                       if(i == (processors - 1)){      numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;        }
-                                       lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
-                               }
-                       }
-#endif
-                       
-                       if(processors == 1){
-                               numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
-                               
-                               int numNoParents = chimera->getNumNoParents();
-                               if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+                       #endif
                                
-                               if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI
+                               delete chimera;
+                       #endif
                                
-                       }else{
-                               processIDS.resize(0);
+                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                                
-                               numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); 
-                       
-                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
-                               rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
-                               if (trim) {  rename((trimFastaFileName + toString(processIDS[0]) + ".temp").c_str(), trimFastaFileName.c_str()); }
-                                       
-                               //append output files
-                               for(int i=1;i<processIDS.size();i++){
-                                       m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                       m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
-                                       
-                                       m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
-                                       m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
-                                       
-                                       if (trim) {
-                                               m->appendFiles((trimFastaFileName + toString(processIDS[i]) + ".temp"), trimFastaFileName);
-                                               m->mothurRemove((trimFastaFileName + toString(processIDS[i]) + ".temp"));
-                                       }
-                               }
+                               //append files
+                               m->appendFiles(thisoutputFileName, outputFileName); m->mothurRemove(thisoutputFileName); 
+                               totalChimeras = m->appendFiles(thisaccnosFileName, accnosFileName); m->mothurRemove(thisaccnosFileName);
+                               if (trim) { m->appendFiles(thistrimFastaFileName, trimFastaFileName); m->mothurRemove(thistrimFastaFileName); }
                                
-                               if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
+                               totalSeqs += numSeqs;
                        }
-
                        
-               #endif
+                       if (fileToPriority.size() > 1) { totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName); }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI
-                       delete chimera;
-                       #endif
-                       
-                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                       
-                       outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
-                       outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
-                       if (trim) {  outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }
+                       if (parser != NULL) { delete parser; } 
                        
-                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+                       m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences.");     m->mothurOutEndLine();
                }
                
                //set accnos file as new current accnosfile
@@ -757,6 +728,376 @@ int ChimeraSlayerCommand::execute(){
        }
 }
 //**********************************************************************************************************************
+int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, string accnosFileName, string trimFastaFileName){
+       try {
+               
+#ifdef USE_MPI 
+               int pid, numSeqsPerProcessor; 
+               int tag = 2001;
+               vector<unsigned long long> MPIPos;
+               
+               MPI_Status status; 
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+               
+               MPI_File inMPI;
+               MPI_File outMPI;
+               MPI_File outMPIAccnos;
+               MPI_File outMPIFasta;
+               
+               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+               int inMode=MPI_MODE_RDONLY; 
+               
+               char outFilename[1024];
+               strcpy(outFilename, outputFileName.c_str());
+               
+               char outAccnosFilename[1024];
+               strcpy(outAccnosFilename, accnosFileName.c_str());
+               
+               char outFastaFilename[1024];
+               strcpy(outFastaFilename, trimFastaFileName.c_str());
+               
+               char inFileName[1024];
+               strcpy(inFileName, inputFile.c_str());
+               
+               MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+               MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+               MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+               if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
+               
+               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+               
+               if (pid == 0) { //you are the root process 
+                       m->mothurOutEndLine();
+                       m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
+                       m->mothurOutEndLine();
+                       
+                       string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
+                       
+                       //print header
+                       int length = outTemp.length();
+                       char* buf2 = new char[length];
+                       memcpy(buf2, outTemp.c_str(), length);
+                       
+                       MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
+                       delete buf2;
+                       
+                       MPIPos = m->setFilePosFasta(inputFile, numSeqs); //fills MPIPos, returns numSeqs
+                       
+                       if (templatefile != "self") { //if template=self we can only use 1 processor
+                               //send file positions to all processes
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
+                       }
+                       //figure out how many sequences you have to align
+                       numSeqsPerProcessor = numSeqs / processors;
+                       int startIndex =  pid * numSeqsPerProcessor;
+                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                       
+                       if (templatefile == "self") { //if template=self we can only use 1 processor
+                               startIndex = 0;
+                               numSeqsPerProcessor = numSeqs;
+                       }
+                       
+                       //do your part
+                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
+                       
+                       int numNoParents = chimera->getNumNoParents();
+                       int temp;
+                       for(int i = 1; i < processors; i++) { 
+                               MPI_Recv(&temp, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &status);
+                               numNoParents += temp;
+                       }
+                       
+                       
+                       if (numSeqs == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+                       
+                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+                       
+               }else{ //you are a child process
+                       if (templatefile != "self") { //if template=self we can only use 1 processor
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPIPos.resize(numSeqs+1);
+                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
+                               //do your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
+                               
+                               int numNoParents = chimera->getNumNoParents();
+                               MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+                               
+                       }
+               }
+               
+               //close files 
+               MPI_File_close(&inMPI);
+               MPI_File_close(&outMPI);
+               MPI_File_close(&outMPIAccnos); 
+               if (trim) { MPI_File_close(&outMPIFasta); }
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+               
+               
+#endif         
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "MPIExecute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ChimeraSlayerCommand::deconvoluteResults(SequenceParser* parser, string outputFileName, string accnosFileName, string trimFileName){
+       try {
+               map<string, string> uniqueNames = parser->getAllSeqsMap();
+               map<string, string>::iterator itUnique;
+               int total = 0;
+               
+               //edit accnos file
+               ifstream in2; 
+               m->openInputFile(accnosFileName, in2, "no error");
+               
+               ofstream out2;
+               m->openOutputFile(accnosFileName+".temp", out2);
+               
+               string name; name = "";
+               set<string> chimerasInFile;
+               set<string>::iterator itChimeras;
+               
+               while (!in2.eof()) {
+                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+                       
+                       in2 >> name; m->gobble(in2);
+                       
+                       //find unique name
+                       itUnique = uniqueNames.find(name);
+                       
+                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                       else {
+                               itChimeras = chimerasInFile.find((itUnique->second));
+                               
+                               if (itChimeras == chimerasInFile.end()) {
+                                       out2 << itUnique->second << endl;
+                                       chimerasInFile.insert((itUnique->second));
+                                       total++;
+                               }
+                       }
+               }
+               in2.close();
+               out2.close();
+               
+               m->mothurRemove(accnosFileName);
+               rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
+               
+               
+               //edit chimera file
+               ifstream in; 
+               m->openInputFile(outputFileName, in);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+
+               string rest, parent1, parent2, line;
+               set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
+               set<string>::iterator itNames;
+               
+               //assumptions - in file each read will always look like...
+               /*
+                F11Fcsw_92754  no
+                F11Fcsw_63104  F11Fcsw_33372   F11Fcsw_37007   0.89441 80.4469 0.2     1.03727 93.2961 52.2    no      0-241   243-369 
+                */
+               
+               //get header line
+               if (!in.eof()) {
+                       line = m->getline(in); m->gobble(in);
+                       out << line << endl;
+               }
+               
+               //for the chimera file, we want to make sure if any group finds a sequence to be chimeric then all groups do, 
+               //so if this is a report that did not find it to be chimeric, but it appears in the accnos file, 
+               //then ignore this report and continue until we find the report that found it to be chimeric
+               
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
+                       
+                       in >> name;             m->gobble(in);
+                       in >> parent1;  m->gobble(in);
+                       
+                       if (name == "Name") { //name = "Name" because we append the header line each time we add results from the groups
+                               line = m->getline(in); m->gobble(in);
+                       }else {
+                               if (parent1 == "no") {
+                                       //find unique name
+                                       itUnique = uniqueNames.find(name);
+                                       
+                                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       else {
+                                               //is this sequence really not chimeric??
+                                               itChimeras = chimerasInFile.find(itUnique->second);
+                                               
+                                               if (itChimeras == chimerasInFile.end()) {
+                                                       itNames = namesInFile.find((itUnique->second));
+                                                       
+                                                       if (itNames == namesInFile.end()) {cout << itUnique->second << endl; out << itUnique->second << '\t' << "no" << endl; namesInFile.insert(itUnique->second); }
+                                               }
+                                       }
+                               }else { //read the rest of the line
+                                       double DivQLAQRB,PerIDQLAQRB,BootStrapA,DivQLBQRA,PerIDQLBQRA,BootStrapB;
+                                       string flag, range1, range2;
+                                       bool print = false;
+                                       in >> parent2 >> DivQLAQRB >> PerIDQLAQRB >> BootStrapA >> DivQLBQRA >> PerIDQLBQRA >> BootStrapB >> flag >> range1 >> range2;  m->gobble(in);
+                                       
+                                       //find unique name
+                                       itUnique = uniqueNames.find(name);
+                                       
+                                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       else {
+                                               name = itUnique->second;
+                                               //is this name already in the file
+                                               itNames = namesInFile.find((name));
+                                               
+                                               if (itNames == namesInFile.end()) { //no not in file
+                                                       if (flag == "no") { //are you really a no??
+                                                               //is this sequence really not chimeric??
+                                                               itChimeras = chimerasInFile.find(name);
+                                                               
+                                                               //then you really are a no so print, otherwise skip
+                                                               if (itChimeras == chimerasInFile.end()) { print = true; }
+                                                       }else{ print = true; }
+                                               }
+                                       }
+                                       
+                                       if (print) {
+                                               out << name << '\t';
+                                               cout << name<< endl;
+                                               namesInFile.insert(name);
+
+                                               //output parent1's name
+                                               itUnique = uniqueNames.find(parent1);
+                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else { out << itUnique->second << '\t'; }
+                                               
+                                               //output parent2's name
+                                               itUnique = uniqueNames.find(parent2);
+                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else { out << itUnique->second << '\t'; }
+                                               
+                                               out << DivQLAQRB << '\t' << PerIDQLAQRB << '\t' << BootStrapA << '\t' << DivQLBQRA << '\t' << PerIDQLBQRA << '\t' << BootStrapB << '\t' << flag << '\t' << range1 << '\t' << range2 << endl;
+                                       }
+                               }                               
+                       }
+               }
+               in.close();
+               out.close();
+               
+               m->mothurRemove(outputFileName);
+               rename((outputFileName+".temp").c_str(), outputFileName.c_str());
+               
+               //edit fasta file
+               if (trim) {
+                       ifstream in3; 
+                       m->openInputFile(trimFileName, in3);
+                       
+                       ofstream out3;
+                       m->openOutputFile(trimFileName+".temp", out3);
+                       
+                       namesInFile.clear();
+                       
+                       while (!in3.eof()) {
+                               if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); m->mothurRemove((trimFileName+".temp")); return 0; }
+                               
+                               Sequence seq(in3); m->gobble(in3);
+                               
+                               if (seq.getName() != "") {
+                                       //find unique name
+                                       itUnique = uniqueNames.find(seq.getName());
+                                       
+                                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ seq.getName() + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       else {
+                                               itNames = namesInFile.find((itUnique->second));
+                                               
+                                               if (itNames == namesInFile.end()) {
+                                                       seq.printSequence(out3);
+                                               }
+                                       }
+                               }
+                       }
+                       in3.close();
+                       out3.close();
+                       
+                       m->mothurRemove(trimFileName);
+                       rename((trimFileName+".temp").c_str(), trimFileName.c_str());
+               }
+               
+               return total;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "deconvoluteResults");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+int ChimeraSlayerCommand::setupChimera(string inputFile, map<string, int>& priority){
+       try {
+               if (templatefile != "self") { //you want to run slayer with a reference template
+                       chimera = new ChimeraSlayer(inputFile, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());      
+               }else {
+                       chimera = new ChimeraSlayer(inputFile, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());    
+               }
+               
+               if (m->control_pressed) { delete chimera; return 0; }
+               
+               if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+               
+               return (chimera->getLength());
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "setupChimera");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+string ChimeraSlayerCommand::getNamesFile(string& inputFile){
+       try {
+               string nameFile = "";
+               
+               m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
+               
+               //use unique.seqs to create new name and fastafile
+               string inputString = "fasta=" + inputFile;
+               m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+               m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
+               
+               Command* uniqueCommand = new DeconvoluteCommand(inputString);
+               uniqueCommand->execute();
+               
+               map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
+               
+               delete uniqueCommand;
+               
+               m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+               
+               nameFile = filenames["name"][0];
+               inputFile = filenames["fasta"][0];
+               
+               return nameFile;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "getNamesFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos, string fasta){
        try {
@@ -1007,6 +1348,7 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                int process = 0;
                int num = 0;
                int numNoParents = 0;
+               processIDS.clear();
                
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                //loop through and create all the processes you want
@@ -1081,6 +1423,26 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                }
 #endif 
                if (num == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+               
+               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+               rename((accnos + toString(processIDS[0]) + ".temp").c_str(), accnos.c_str());
+               if (trim) {  rename((fasta + toString(processIDS[0]) + ".temp").c_str(), fasta.c_str()); }
+               
+               //append output files
+               for(int i=1;i<processIDS.size();i++){
+                       m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                       m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
+                       
+                       m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+                       m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+                       
+                       if (trim) {
+                               m->appendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta);
+                               m->mothurRemove((fasta + toString(processIDS[i]) + ".temp"));
+                       }
+               }
+               
+               
                return num;
        }
        catch(exception& e) {
@@ -1179,6 +1541,56 @@ map<string, int> ChimeraSlayerCommand::sortFastaFile(string fastaFile, string na
                exit(1);
        }
 }
-
+/**************************************************************************************************/
+map<string, int> ChimeraSlayerCommand::sortFastaFile(vector<Sequence>& thisseqs, map<string, string>& nameMap, string newFile) {
+       try {
+               map<string, int> nameAbund;
+               vector<seqPriorityNode> nameVector;
+               
+               //read through fastafile and store info
+               map<string, string> seqs;
+                               
+               for (int i = 0; i < thisseqs.size(); i++) {
+                       
+                       if (m->control_pressed) { return nameAbund; }
+                       
+                       map<string, string>::iterator itNameMap = nameMap.find(thisseqs[i].getName());
+                       
+                       if (itNameMap == nameMap.end()){
+                               m->control_pressed = true;
+                               m->mothurOut("[ERROR]: " + thisseqs[i].getName() + " is in your fastafile, but is not in your namesfile, please correct."); m->mothurOutEndLine();
+                       }else {
+                               int num = m->getNumNames(itNameMap->second);
+                               
+                               seqPriorityNode temp(num, thisseqs[i].getAligned(), thisseqs[i].getName());
+                               nameVector.push_back(temp);
+                       }
+               }
+       
+               //sort by num represented
+               sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes);
+       
+               if (m->control_pressed) { return nameAbund; }
+               
+               if (thisseqs.size() != nameVector.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return nameAbund; }
+                               
+               ofstream out;
+               m->openOutputFile(newFile, out);
+               
+               //print new file in order of
+               for (int i = 0; i < nameVector.size(); i++) {
+                       out << ">" << nameVector[i].name << endl << nameVector[i].seq << endl;
+                       nameAbund[nameVector[i].name] = nameVector[i].numIdentical;
+               }
+               out.close();
+               
+               return nameAbund;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 
index b37d4c04ac397fb2ede7ba1d2e0501b9508e5b9d..9a37a5ba3941a11d79c0b00ee387a3c2db2b2628 100644 (file)
@@ -14,6 +14,7 @@
 #include "command.hpp"
 #include "chimera.h"
 #include "chimeraslayer.h"
+#include "sequenceparser.h"
 
 /***********************************************************/
 
@@ -43,19 +44,25 @@ private:
 
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
-       map<string, int> priority;
        
        int driver(linePair*, string, string, string, string);
        int createProcesses(string, string, string, string);
        int divideInHalf(Sequence, string&, string&);
        map<string, int> sortFastaFile(string, string);
+       map<string, int> sortFastaFile(vector<Sequence>&, map<string, string>&, string newFile);
+       string getNamesFile(string&);
+       int setupChimera(string, map<string, int>&);
+       int MPIExecute(string, string, string, string);
+       int deconvoluteResults(SequenceParser*, string, string, string);
+       
+
                
        #ifdef USE_MPI
        int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
        #endif
 
        bool abort, realign, trim, trimera, save;
-       string fastafile, templatefile, outputDir, search, namefile, blastlocation;
+       string fastafile, groupfile, templatefile, outputDir, search, namefile, blastlocation;
        int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
        float divR;
        Chimera* chimera;
@@ -63,6 +70,7 @@ private:
        vector<string> outputNames;
        vector<string> fastaFileNames;
        vector<string> nameFileNames;
+       vector<string> groupFileNames;
        
 };
 
index 73e7ace9860eeb6410517497b5e0495e9fdb1b96..c035b01d689657c233126b5748b303a04a997356 100644 (file)
@@ -591,6 +591,47 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp
                map<string, string>::iterator itUnique;
                int total = 0;
                
+               //edit accnos file
+               ifstream in2; 
+               m->openInputFile(accnosFileName, in2);
+               
+               ofstream out2;
+               m->openOutputFile(accnosFileName+".temp", out2);
+               
+               string name;
+               set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
+               set<string>::iterator itNames;
+               set<string> chimerasInFile;
+               set<string>::iterator itChimeras;
+
+               
+               while (!in2.eof()) {
+                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+                       
+                       in2 >> name; m->gobble(in2);
+                       
+                       //find unique name
+                       itUnique = uniqueNames.find(name);
+                       
+                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                       else {
+                               itChimeras = chimerasInFile.find((itUnique->second));
+                               
+                               if (itChimeras == chimerasInFile.end()) {
+                                       out2 << itUnique->second << endl;
+                                       chimerasInFile.insert((itUnique->second));
+                                       total++;
+                               }
+                       }
+               }
+               in2.close();
+               out2.close();
+               
+               m->mothurRemove(accnosFileName);
+               rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
+               
+               
+               
                //edit chimera file
                ifstream in; 
                m->openInputFile(outputFileName, in);
@@ -599,12 +640,11 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp
                m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
                float temp1;
-               string name, rest, parent1, parent2;
-               set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
-               set<string>::iterator itNames;
-               
+               string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
+               name = "";
+               namesInFile.clear();    
                //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
-               /*
+               /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
                 0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
                 0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
                */
@@ -613,11 +653,13 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp
                        
                        if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
                        
+                       bool print = false;
                        in >> temp1;    m->gobble(in);
                        in >> name;             m->gobble(in);
                        in >> parent1;  m->gobble(in);
                        in >> parent2;  m->gobble(in);
-                       rest = m->getline(in); m->gobble(in);
+                       in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
+                       m->gobble(in);
                        
                        //parse name - name will look like U68590/ab=1/
                        string restOfName = "";
@@ -632,46 +674,54 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp
                        
                        if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
                        else {
-                               itNames = namesInFile.find((itUnique->second));
+                               name = itUnique->second;
+                               //is this name already in the file
+                               itNames = namesInFile.find((name));
                                
-                               if (itNames == namesInFile.end()) {
-                                       out << temp1 << '\t' << itUnique->second << restOfName << '\t';
-                                       namesInFile.insert((itUnique->second));
-                                       
-                                       //parse parent1 names
-                                       if (parent1 != "*") {
-                                               restOfName = "";
-                                               pos = parent1.find_first_of('/');
-                                               if (pos != string::npos) {
-                                                       restOfName = parent1.substr(pos);
-                                                       parent1 = parent1.substr(0, pos);
-                                               }
+                               if (itNames == namesInFile.end()) { //no not in file
+                                       if (flag == "N") { //are you really a no??
+                                               //is this sequence really not chimeric??
+                                               itChimeras = chimerasInFile.find(name);
                                                
-                                               itUnique = uniqueNames.find(parent1);
-                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
-                                               else {
-                                                       out << itUnique->second << restOfName << '\t';
-                                               }
-                                       }else { out << parent1 << '\t'; }
+                                               //then you really are a no so print, otherwise skip
+                                               if (itChimeras == chimerasInFile.end()) { print = true; }
+                                       }else{ print = true; }
+                               }
+                       }
+                       
+                       if (print) {
+                               out << temp1 << '\t' << name << restOfName << '\t';
+                               namesInFile.insert(name);
+                               
+                               //parse parent1 names
+                               if (parent1 != "*") {
+                                       restOfName = "";
+                                       pos = parent1.find_first_of('/');
+                                       if (pos != string::npos) {
+                                               restOfName = parent1.substr(pos);
+                                               parent1 = parent1.substr(0, pos);
+                                       }
                                        
-                                       //parse parent2 names
-                                       if (parent2 != "*") {
-                                               restOfName = "";
-                                               pos = parent2.find_first_of('/');
-                                               if (pos != string::npos) {
-                                                       restOfName = parent2.substr(pos);
-                                                       parent2 = parent2.substr(0, pos);
-                                               }
-                                               
-                                               itUnique = uniqueNames.find(parent2);
-                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
-                                               else {
-                                                       out << itUnique->second << restOfName << '\t';
-                                               }
-                                       }else { out << parent2 << '\t'; }
+                                       itUnique = uniqueNames.find(parent1);
+                                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       else {  out << itUnique->second << restOfName << '\t';  }
+                               }else { out << parent1 << '\t'; }
+                               
+                               //parse parent2 names
+                               if (parent2 != "*") {
+                                       restOfName = "";
+                                       pos = parent2.find_first_of('/');
+                                       if (pos != string::npos) {
+                                               restOfName = parent2.substr(pos);
+                                               parent2 = parent2.substr(0, pos);
+                                       }
                                        
-                                       out  << rest << endl;
-                               }
+                                       itUnique = uniqueNames.find(parent2);
+                                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       else {  out << itUnique->second << restOfName << '\t';  }
+                               }else { out << parent2 << '\t'; }
+                               
+                               out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
                        }
                }
                in.close();
@@ -680,41 +730,7 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp
                m->mothurRemove(outputFileName);
                rename((outputFileName+".temp").c_str(), outputFileName.c_str());
                
-               //edit accnos file
-               ifstream in2; 
-               m->openInputFile(accnosFileName, in2);
-               
-               ofstream out2;
-               m->openOutputFile(accnosFileName+".temp", out2);
-               
-               name = "";
-               namesInFile.clear();
-               
-               while (!in2.eof()) {
-                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
-                       
-                       in2 >> name; m->gobble(in2);
-                       
-                       //find unique name
-                       itUnique = uniqueNames.find(name);
-                       
-                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
-                       else {
-                               itNames = namesInFile.find((itUnique->second));
-       
-                               if (itNames == namesInFile.end()) {
-                                       out2 << itUnique->second << endl;
-                                       namesInFile.insert((itUnique->second));
-                                       total++;
-                               }
-                       }
-               }
-               in2.close();
-               out2.close();
-               
-               m->mothurRemove(accnosFileName);
-               rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
-               
+                               
                //edit anls file
                //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
                /*
index c80bff27e4ab69bf26d0fddaae534fe625f6df0b..45d1176f9ddafeee56b219529efed45ea3d19d02 100644 (file)
@@ -958,7 +958,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
 }
 
 /**************************************************************************************************/
-void MothurOut::appendFiles(string temp, string filename) {
+int MothurOut::appendFiles(string temp, string filename) {
        try{
                ofstream output;
                ifstream input;
@@ -968,15 +968,18 @@ void MothurOut::appendFiles(string temp, string filename) {
                int ableToOpen = openInputFile(temp, input, "no error");
                //int ableToOpen = openInputFile(temp, input);
                
+               int numLines = 0;
                if (ableToOpen == 0) { //you opened it
                        while(char c = input.get()){
                                if(input.eof())         {       break;                  }
-                               else                            {       output << c;    }
+                               else                            {       output << c;    if (c == '\n') {numLines++;} }
                        }
                        input.close();
                }
                
                output.close();
+               
+               return numLines;
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "appendFiles");
index 013aa541444227deab43ec80eda7948dfd402bb6..3019386a64d8947bccfcee23a9705e3b7e5d3898 100644 (file)
@@ -62,7 +62,7 @@ class MothurOut {
                vector<unsigned long long> setFilePosEachLine(string, int&);
                vector<unsigned long long> setFilePosFasta(string, int&);
                string sortFile(string, string);
-               void appendFiles(string, string);
+               int appendFiles(string, string);
                int renameFile(string, string); //oldname, newname
                string getFullPathName(string);
                string hasPath(string);
index 5b64b133a0c3df43cfb018c03d9867136661f100..566b44c6bad55402fae1374aab0212547befe7cc 100644 (file)
@@ -327,7 +327,7 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int
                return 1;
        }
        catch(exception& e) {
-               m->errorOut(e, "QualityScores", "flipQScores");
+               m->errorOut(e, "QualityScores", "stripQualWindowAverage");
                exit(1);
        }                                                       
        
index be75e4fac4ab60697b0dd84de6eeb275d231a5b3..eeba0417b08f90aeb19d0ecf3664010133ad88fe 100644 (file)
@@ -317,6 +317,19 @@ int SeqSummaryCommand::execute(){
                sort(ambigBases.begin(), ambigBases.end());
                sort(longHomoPolymer.begin(), longHomoPolymer.end());
                int size = startPosition.size();
+               
+               //find means
+               float meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
+               meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
+               for (int i = 0; i < size; i++) {
+                       meanStartPosition += startPosition[i];
+                       meanEndPosition += endPosition[i];
+                       meanSeqLength += seqLength[i];
+                       meanAmbigBases += ambigBases[i];
+                       meanLongHomoPolymer += longHomoPolymer[i];
+               }
+               //this is an int divide so the remainder is lost
+               meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size;
                                
                int ptile0_25   = int(size * 0.025);
                int ptile25             = int(size * 0.250);
@@ -340,6 +353,8 @@ int SeqSummaryCommand::execute(){
                m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75]) + "\t" + toString(ptile75+1)); m->mothurOutEndLine();
                m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5]) + "\t" + toString(ptile97_5+1)); m->mothurOutEndLine();
                m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
+               m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine();
+
                if (namefile == "") {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
                else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }