]> git.donarmstrong.com Git - mothur.git/commitdiff
adds group parameter to chimera.uchime so you can check for chimeras with template...
authorwestcott <westcott>
Wed, 14 Sep 2011 17:29:48 +0000 (17:29 +0000)
committerwestcott <westcott>
Wed, 14 Sep 2011 17:29:48 +0000 (17:29 +0000)
chimerauchimecommand.cpp
chimerauchimecommand.h
commandfactory.cpp
preclustercommand.cpp
sequenceparser.cpp
sequenceparser.h

index 4c1db20ac23105ad29652a3f1feedfc38237c3a6..73e7ace9860eeb6410517497b5e0495e9fdb1b96 100644 (file)
@@ -20,6 +20,7 @@ vector<string> ChimeraUchimeCommand::setParameters(){
                CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -61,6 +62,7 @@ string ChimeraUchimeCommand::getHelpString(){
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
+               helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
                helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
                helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
@@ -299,6 +301,83 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        
                        if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                        
+                       bool hasGroup = true;
+                       groupfile = validParameter.validFile(parameters, "group", false);
+                       if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
+                       else { 
+                               m->splitAtDash(groupfile, groupFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < groupFileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (groupFileNames[i] == "current") { 
+                                               groupFileNames[i] = m->getGroupFile(); 
+                                               if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       groupFileNames.erase(groupFileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(groupFileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               groupFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               groupFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       groupFileNames.erase(groupFileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setGroupFile(groupFileNames[i]);
+                                               }
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       
+                       if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+                       
+                       
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                        
@@ -373,6 +452,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        skipgaps2 = m->isTrue(temp); 
                        
                        if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+                       if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+
                }
        }
        catch(exception& e) {
@@ -394,109 +475,95 @@ int ChimeraUchimeCommand::execute(){
                        
                        int start = time(NULL); 
                        string nameFile = "";
-                       
-                       if (templatefile == "self") { //you want to run uchime with a reference template
-                               
-                               #ifdef USE_MPI  
-                                       int pid; 
-                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                                       if (pid == 0) { //you are the root process 
-                               #endif  
+                       if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
+                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
+                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
+                       string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
+                       string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                                
+                       //you provided a groupfile
+                       string groupFile = "";
+                       if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
+                       
+                       if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
+
                                if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
                                if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
                                        nameFile = nameFileNames[s];
-                               }else {
-                                       m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
-                                       
-                                       //use unique.seqs to create new name and fastafile
-                                       string inputString = "fasta=" + fastaFileNames[s];
-                                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
-                                       m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-                                       
-                                       Command* uniqueCommand = new DeconvoluteCommand(inputString);
-                                       uniqueCommand->execute();
-                                       
-                                       map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
-                                       
-                                       delete uniqueCommand;
-                                       
-                                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
-                                       
-                                       nameFile = filenames["name"][0];
-                                       fastaFileNames[s] = filenames["fasta"][0];
-                               }
-                               
-                               //create input file for uchime
-                               //read through fastafile and store info
-                               map<string, string> seqs;
-                               ifstream in;
-                               m->openInputFile(fastaFileNames[s], in);
-                               
-                               while (!in.eof()) {
-                                       
-                                       if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0; }
-                                       
-                                       Sequence seq(in); m->gobble(in);
-                                       seqs[seq.getName()] = seq.getAligned();
-                               }
-                               in.close();
-                               
+                               }else { nameFile = getNamesFile(fastaFileNames[s]); }
+                                                                       
+                               map<string, string> seqs;  
+                               readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
+
                                //read namefile
                                vector<seqPriorityNode> nameMapCount;
-                               int error = m->readNames(nameFile, nameMapCount, seqs);
-                               
-                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
-                               
+                               int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
                                if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
                                if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
                                
-                               sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
+                               printFile(nameMapCount, newFasta);
+                               fastaFileNames[s] = newFasta;
+                       }
+                       
+                       if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                       
+                       if (groupFile != "") {
+                               if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
+                                       nameFile = nameFileNames[s];
+                               }else { nameFile = getNamesFile(fastaFileNames[s]); }
                                
-                               string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
-                               ofstream out;
-                               m->openOutputFile(newFasta, out);
+                               //Parse sequences by group
+                               SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
+                               vector<string> groups = parser.getNamesOfGroups();
+                                       
+                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
+                                                               
+                               //clears files
+                               ofstream out, out1, out2;
+                               m->openOutputFile(outputFileName, out); out.close(); 
+                               m->openOutputFile(accnosFileName, out1); out1.close();
+                               if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
+                               int totalSeqs = 0;
                                
-                               //print new file in order of
-                               for (int i = 0; i < nameMapCount.size(); i++) {
-                                       out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
-                               }
-                               out.close();
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
+                               else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups);                      }
+       #else
+                               totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);
+       #endif
+                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+
+                               int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
+                               
+                               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
+                               m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
                                
-                               fastaFileNames[s] = newFasta;
-                                               
-                               #ifdef USE_MPI  
-                                       }
-                               #endif
                                if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                       }
-                       
-                       if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
-                       string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
+                                       
+                       }else{
+                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
                        
-                       if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
+                               int numSeqs = 0;
+                               int numChimeras = 0;
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
+                               else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
+       #else
+                               numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras);
+       #endif
+                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
                        
-                       int numSeqs = 0;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
-                       else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
-#else
-                       numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName);
-#endif
-                       if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
+                               //remove file made for uchime
+                               if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
                        
-                       //remove file made for uchime
-                       if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
+                               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
+                       }
                        
                        outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
                        outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
                        if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
-                       
-                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
                }
-               
+       
                //set accnos file as new current accnosfile
                string current = "";
                itTypes = outputTypes.find("accnos");
@@ -518,8 +585,368 @@ int ChimeraUchimeCommand::execute(){
        }
 }
 //**********************************************************************************************************************
+int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
+       try {
+               map<string, string> uniqueNames = parser.getAllSeqsMap();
+               map<string, string>::iterator itUnique;
+               int total = 0;
+               
+               //edit chimera file
+               ifstream in; 
+               m->openInputFile(outputFileName, in);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               float temp1;
+               string name, rest, parent1, parent2;
+               set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
+               set<string>::iterator itNames;
+               
+               //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
+               /*
+                0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
+                0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
+               */
+               
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
+                       
+                       in >> temp1;    m->gobble(in);
+                       in >> name;             m->gobble(in);
+                       in >> parent1;  m->gobble(in);
+                       in >> parent2;  m->gobble(in);
+                       rest = m->getline(in); m->gobble(in);
+                       
+                       //parse name - name will look like U68590/ab=1/
+                       string restOfName = "";
+                       int pos = name.find_first_of('/');
+                       if (pos != string::npos) {
+                               restOfName = name.substr(pos);
+                               name = name.substr(0, pos);
+                       }
+                       
+                       //find unique name
+                       itUnique = uniqueNames.find(name);
+                       
+                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                       else {
+                               itNames = namesInFile.find((itUnique->second));
+                               
+                               if (itNames == namesInFile.end()) {
+                                       out << temp1 << '\t' << itUnique->second << restOfName << '\t';
+                                       namesInFile.insert((itUnique->second));
+                                       
+                                       //parse parent1 names
+                                       if (parent1 != "*") {
+                                               restOfName = "";
+                                               pos = parent1.find_first_of('/');
+                                               if (pos != string::npos) {
+                                                       restOfName = parent1.substr(pos);
+                                                       parent1 = parent1.substr(0, pos);
+                                               }
+                                               
+                                               itUnique = uniqueNames.find(parent1);
+                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else {
+                                                       out << itUnique->second << restOfName << '\t';
+                                               }
+                                       }else { out << parent1 << '\t'; }
+                                       
+                                       //parse parent2 names
+                                       if (parent2 != "*") {
+                                               restOfName = "";
+                                               pos = parent2.find_first_of('/');
+                                               if (pos != string::npos) {
+                                                       restOfName = parent2.substr(pos);
+                                                       parent2 = parent2.substr(0, pos);
+                                               }
+                                               
+                                               itUnique = uniqueNames.find(parent2);
+                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else {
+                                                       out << itUnique->second << restOfName << '\t';
+                                               }
+                                       }else { out << parent2 << '\t'; }
+                                       
+                                       out  << rest << endl;
+                               }
+                       }
+               }
+               in.close();
+               out.close();
+               
+               m->mothurRemove(outputFileName);
+               rename((outputFileName+".temp").c_str(), outputFileName.c_str());
+               
+               //edit accnos file
+               ifstream in2; 
+               m->openInputFile(accnosFileName, in2);
+               
+               ofstream out2;
+               m->openOutputFile(accnosFileName+".temp", out2);
+               
+               name = "";
+               namesInFile.clear();
+               
+               while (!in2.eof()) {
+                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+                       
+                       in2 >> name; m->gobble(in2);
+                       
+                       //find unique name
+                       itUnique = uniqueNames.find(name);
+                       
+                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                       else {
+                               itNames = namesInFile.find((itUnique->second));
+       
+                               if (itNames == namesInFile.end()) {
+                                       out2 << itUnique->second << endl;
+                                       namesInFile.insert((itUnique->second));
+                                       total++;
+                               }
+                       }
+               }
+               in2.close();
+               out2.close();
+               
+               m->mothurRemove(accnosFileName);
+               rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
+               
+               //edit anls file
+               //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
+               /*
+                ------------------------------------------------------------------------
+                Query   (  179 nt) F21Fcsw_11639/ab=591/
+                ParentA (  179 nt) F11Fcsw_6529/ab=1625/
+                ParentB (  181 nt) F21Fcsw_12128/ab=1827/
+                
+                A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
+                Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
+                B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
+                Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
+                Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
+                Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+                
+                A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
+                Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
+                B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
+                Diffs      NNN     N N  N                   N  N BB    NNN                              
+                Votes      000     0 0  0                   0  0 ++    000                              
+                Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+                
+                A   159 TGGAACTGAGACACGGTCCAA 179
+                Q   159 TGGAACTGAGACACGGTCCAA 179
+                B   161 TGGAACTGAGACACGGTCCAA 181
+                Diffs                        
+                Votes                        
+                Model   BBBBBBBBBBBBBBBBBBBBB
+                
+                Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
+                Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
+               */
+               if (chimealns) {
+                       ifstream in3; 
+                       m->openInputFile(alnsFileName, in3);
+               
+                       ofstream out3;
+                       m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
+               
+                       name = "";
+                       namesInFile.clear();
+                       string line = "";
+                       
+                       while (!in3.eof()) {
+                               if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
+                               
+                               line = "";
+                               line = m->getline(in3); 
+                               string temp = "";
+                               
+                               if (line != "") {
+                                       istringstream iss(line);
+                                       iss >> temp;
+                                       
+                                       //are you a name line
+                                       if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
+                                               int spot = 0;
+                                               for (int i = 0; i < line.length(); i++) {
+                                                       spot = i;
+                                                       if (line[i] == ')') { break; }
+                                                       else { out3 << line[i]; }
+                                               }
+                                               
+                                               if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else {
+                                                       out << line[spot] << line[spot+1];
+                                                       
+                                                       name = line.substr(spot+2);
+                                                       
+                                                       //parse name - name will either look like U68590/ab=1/ or U68590
+                                                       string restOfName = "";
+                                                       int pos = name.find_first_of('/');
+                                                       if (pos != string::npos) {
+                                                               restOfName = name.substr(pos);
+                                                               name = name.substr(0, pos);
+                                                       }
+                                                       
+                                                       //find unique name
+                                                       itUnique = uniqueNames.find(name);
+                                                       
+                                                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
+                                                       else {
+                                                               //only limit repeats on query names
+                                                               if (temp == "Query") {
+                                                                       itNames = namesInFile.find((itUnique->second));
+                                                                       
+                                                                       if (itNames == namesInFile.end()) {
+                                                                               out << itUnique->second << restOfName << endl;
+                                                                               namesInFile.insert((itUnique->second));
+                                                                       }
+                                                               }else { out << itUnique->second << restOfName << endl;  }
+                                                       }
+                                                       
+                                               }
+                                               
+                                       }else { //not need to alter line
+                                               out3 << line << endl;
+                                       }
+                               }else { out3 << endl; }
+                       }
+                       in3.close();
+                       out3.close();
+                       
+                       m->mothurRemove(alnsFileName);
+                       rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
+               }
+               
+               return total;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
+       try {
+               
+               sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
+               
+               ofstream out;
+               m->openOutputFile(filename, out);
+               
+               //print new file in order of
+               for (int i = 0; i < nameMapCount.size(); i++) {
+                       out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
+               }
+               out.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraUchimeCommand", "printFile");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
+       try {
+               //create input file for uchime
+               //read through fastafile and store info
+               ifstream in;
+               m->openInputFile(filename, in);
+               
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); return 0; }
+                       
+                       Sequence seq(in); m->gobble(in);
+                       seqs[seq.getName()] = seq.getAligned();
+               }
+               in.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
 
-int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){
+string ChimeraUchimeCommand::getNamesFile(string& inputFile){
+       try {
+               string nameFile = "";
+               
+               m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
+               
+               //use unique.seqs to create new name and fastafile
+               string inputString = "fasta=" + inputFile;
+               m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+               m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
+               
+               Command* uniqueCommand = new DeconvoluteCommand(inputString);
+               uniqueCommand->execute();
+               
+               map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
+               
+               delete uniqueCommand;
+               
+               m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+               
+               nameFile = filenames["name"][0];
+               inputFile = filenames["fasta"][0];
+               
+               return nameFile;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
+       try {
+               
+               int totalSeqs = 0;
+               int numChimeras = 0;
+               
+               for (int i = start; i < end; i++) {
+                       int start = time(NULL);  if (m->control_pressed) {  return 0; }
+                       
+                       int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; }
+                       
+                       int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
+                       totalSeqs += numSeqs;
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //remove file made for uchime
+                       m->mothurRemove(filename);
+                       
+                       //append files
+                       m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
+                       m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
+                       if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
+                       
+                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
+               }       
+               
+               return totalSeqs;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+
+int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
        try {
                
                vector<char*> cPara;
@@ -766,6 +1193,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                m->openOutputFile(accnos, out);
                
                int num = 0;
+               numChimeras = 0;
                while(!in.eof()) {
                        
                        if (m->control_pressed) { break; }
@@ -783,7 +1211,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                        for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
                        m->gobble(in);
                        
-                       if (chimeraFlag == "Y") {  out << name << endl; }
+                       if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
                        num++;
                }
                in.close();
@@ -798,7 +1226,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
 }
 /**************************************************************************************************/
 
-int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) {
+int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
        try {
                
                processIDS.clear();
@@ -810,49 +1238,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                m->divideFile(filename, processors, files);
                
                if (m->control_pressed) {  return 0;  }
-               
-#ifdef USE_MPI 
-               int pid, numSeqsPerProcessor; 
-               int tag = 2001;
-               
-               MPI_Status status; 
-               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
                                
-               if (pid == 0) { //you are the root process 
-                       num = driver(outputFileName, files[0], accnos, alns);
-                       
-                       if (templatefile != "self") {
-                               //wait on chidren
-                               for(int j = 1; j < processors; j++) { 
-                                       int temp;
-                                       MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
-                                       num += temp;
-                                       
-                                       m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
-                                       m->mothurRemove((outputFileName + toString(j) + ".temp"));
-                                       
-                                       m->appendFiles((accnos + toString(j) + ".temp"), accnos);
-                                       m->mothurRemove((accnos + toString(j) + ".temp"));
-                                       
-                                       if (chimealns) {
-                                               m->appendFiles((alns + toString(j) + ".temp"), alns);
-                                               m->mothurRemove((alns + toString(j) + ".temp"));
-                                       }
-                               }
-                       }
-               }else{ //you are a child process
-                       if (templatefile != "self") { //if template=self we can only use 1 processor
-                               num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp"); 
-                               
-                               //send numSeqs to parent
-                               MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
-                       }
-               }
-
-               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-#else
-               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -861,13 +1247,14 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp");
+                               num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
                                
                                //pass numSeqs to parent
                                ofstream out;
                                string tempFile = outputFileName + toString(getpid()) + ".num.temp";
                                m->openOutputFile(tempFile, out);
                                out << num << endl;
+                               out << numChimeras << endl;
                                out.close();
                                
                                exit(0);
@@ -879,7 +1266,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                }
                
                //do my part
-               num = driver(outputFileName, files[0], accnos, alns);
+               num = driver(outputFileName, files[0], accnos, alns, numChimeras);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -891,7 +1278,13 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                        ifstream in;
                        string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
-                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       if (!in.eof()) { 
+                               int tempNum = 0; 
+                               in >> tempNum; m->gobble(in);
+                               num += tempNum; 
+                               in >> tempNum;
+                               numChimeras += tempNum;
+                       }
                        in.close(); m->mothurRemove(tempFile);
                }
                
@@ -909,7 +1302,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                                m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
                        }
                }
-#endif         
+               
                //get rid of the file pieces.
                for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
 #endif         
@@ -920,6 +1313,95 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                exit(1);
        }
 }
+/**************************************************************************************************/
 
+int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups) {
+       try {
+               
+               processIDS.clear();
+               int process = 1;
+               int num = 0;
+               
+               //sanity check
+               if (groups.size() < processors) { processors = groups.size(); }
+               
+               //divide the groups between the processors
+               vector<linePair> lines;
+               int numGroupsPerProcessor = groups.size() / processors;
+               for (int i = 0; i < processors; i++) {
+                       int startIndex =  i * numGroupsPerProcessor;
+                       int endIndex = (i+1) * numGroupsPerProcessor;
+                       if(i == (processors - 1)){      endIndex = groups.size();       }
+                       lines.push_back(linePair(startIndex, endIndex));
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+                               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = outputFName + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+#endif         
+               
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
+                       m->openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); m->mothurRemove(tempFile);
+               }
+               
+               
+               //append output files
+               for(int i=0;i<processIDS[i];i++){
+                       m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
+                       m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
+                       
+                       m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+                       m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+                       
+                       if (chimealns) {
+                               m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
+                               m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
+                       }
+               }
+               
+               return num;     
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 
index 8478117f3c99cd2f31224e614d3a95943fc1b4b0..1f86a98973ba8c88c20de4678866a70db35efe05 100644 (file)
@@ -13,6 +13,7 @@
 
 #include "mothur.h"
 #include "command.hpp"
+#include "sequenceparser.h"
 
 /***********************************************************/
 
@@ -33,18 +34,33 @@ public:
        void help() { m->mothurOut(getHelpString()); }          
        
 private:
+       struct linePair {
+               int start;
+               int end;
+               linePair(int i, int j) : start(i), end(j) {}
+       };
+       
        vector<int> processIDS;   //processid
-       int driver(string, string, string, string);
-       int createProcesses(string, string, string, string);
+       int driver(string, string, string, string, int&);
+       int createProcesses(string, string, string, string, int&);
                
        bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract;
-       string fastafile, templatefile, outputDir, namefile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
+       string fastafile, groupfile, templatefile, outputDir, namefile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
        int processors;
        
        vector<string> outputNames;
        vector<string> fastaFileNames;
        vector<string> nameFileNames;
+       vector<string> groupFileNames;
        
+       string getNamesFile(string&);
+       int readFasta(string, map<string, string>&);
+       int printFile(vector<seqPriorityNode>&, string);
+       int deconvoluteResults(SequenceParser&, string, string, string);
+       int driverGroups(SequenceParser&, string, string, string, string, int, int, vector<string>);
+       int createProcessesGroups(SequenceParser&, string, string, string, string, vector<string>);
+
+
 };
 
 /***********************************************************/
index dcd811fd6669e2d1ca8d6902d2c942d758ba23b4..8e61e6e7ecef2bf38ea19ddb9171312bae3ade7b 100644 (file)
@@ -256,7 +256,7 @@ CommandFactory::CommandFactory(){
        commands["chimera.ccode"]               = "MPIEnabled";
        commands["chimera.check"]               = "MPIEnabled";
        commands["chimera.slayer"]              = "MPIEnabled";
-       commands["chimera.uchime"]              = "MPIEnabled";
+       commands["chimera.uchime"]              = "chimera.uchime";
        commands["chimera.pintail"]             = "MPIEnabled";
        commands["chimera.bellerophon"] = "MPIEnabled";
        commands["screen.seqs"]                 = "MPIEnabled";
index 67b2f31bebe6185fef4137d9c5e839a35b23f0b8..74eddbee75d7021b3ee8584da6b8236df52eca4c 100644 (file)
@@ -235,6 +235,8 @@ int PreClusterCommand::execute(){
                                
                        }
                        
+                       delete parser;
+                       
                        //run unique.seqs for deconvolute results
                        string inputString = "fasta=" + newFastaFile;
                        if (namefile != "") { inputString += ", name=" + newNamesFile; }
index 44012d8ba8321fea520fbe613e0dac8f8911c760..e60f19b0df0034de0c1a693d7c5a55c59ef0ef82 100644 (file)
@@ -106,6 +106,8 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi
                                                }
                                        }
                                }
+                               
+                               allSeqsMap[names[i]] = names[0];
                        }
                        
                        
@@ -247,6 +249,78 @@ vector<Sequence> SequenceParser::getSeqs(string g){
                exit(1);
        }
 }
+/************************************************************/
+int SequenceParser::getSeqs(string g, string filename, bool uchimeFormat=false){ 
+       try {
+               map<string, vector<Sequence> >::iterator it;
+               vector<Sequence> seqForThisGroup;
+               vector<seqPriorityNode> nameVector;
+               
+               it = seqs.find(g);
+               if(it == seqs.end()) {
+                       m->mothurOut("[ERROR]: No sequences available for group " + g + ", please correct."); m->mothurOutEndLine();
+               }else {
+                       
+                       ofstream out;
+                       m->openOutputFile(filename, out);
+                       
+                       seqForThisGroup = it->second;
+                       
+                       if (uchimeFormat) {
+                               // format should look like 
+                               //>seqName /ab=numRedundantSeqs/
+                               //sequence
+                               
+                               map<string, string> nameMapForThisGroup = getNameMap(g);
+                               map<string, string>::iterator itNameMap;
+                               int error = 0;
+                               
+                               for (int i = 0; i < seqForThisGroup.size(); i++) {
+                                       itNameMap = nameMapForThisGroup.find(seqForThisGroup[i].getName());
+                                       
+                                       if (itNameMap == nameMapForThisGroup.end()){
+                                               error = 1;
+                                               m->mothurOut("[ERROR]: " + seqForThisGroup[i].getName() + " is in your fastafile, but is not in your namesfile, please correct."); m->mothurOutEndLine();
+                                       }else {
+                                               int num = m->getNumNames(itNameMap->second);
+                                               
+                                               seqPriorityNode temp(num, seqForThisGroup[i].getAligned(), seqForThisGroup[i].getName());
+                                               nameVector.push_back(temp);
+                                       }
+                               }
+                               
+                               if (error == 1) { out.close(); m->mothurRemove(filename); return 1; }
+                               
+                               //sort by num represented
+                               sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes);
+
+                               //print new file in order of
+                               for (int i = 0; i < nameVector.size(); i++) {
+                                       
+                                       if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
+                                       
+                                       out << ">" << nameVector[i].name  << "/ab=" << nameVector[i].numIdentical << "/" << endl << nameVector[i].seq << endl;
+                               }
+                               
+                       }else { 
+                               for (int i = 0; i < seqForThisGroup.size(); i++) {
+                                       
+                                       if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
+                                       
+                                       seqForThisGroup[i].printSequence(out);  
+                               }
+                       }
+                       out.close();
+               }
+               
+               return 0; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SequenceParser", "getSeqs");
+               exit(1);
+       }
+}
+
 /************************************************************/
 map<string, string> SequenceParser::getNameMap(string g){ 
        try {
@@ -268,6 +342,38 @@ map<string, string> SequenceParser::getNameMap(string g){
        }
 }
 /************************************************************/
+int SequenceParser::getNameMap(string g, string filename){ 
+       try {
+               map<string, map<string, string> >::iterator it;
+               map<string, string> nameMapForThisGroup;
+               
+               it = nameMapPerGroup.find(g);
+               if(it == nameMapPerGroup.end()) {
+                       m->mothurOut("[ERROR]: No nameMap available for group " + g + ", please correct."); m->mothurOutEndLine();
+               }else {
+                       nameMapForThisGroup = it->second;
+                       
+                       ofstream out;
+                       m->openOutputFile(filename, out);
+                       
+                       for (map<string, string>::iterator itFile = nameMapForThisGroup.begin(); itFile != nameMapForThisGroup.end(); itFile++) {
+                               
+                               if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
+                               
+                               out << itFile->first << '\t' << itFile->second << endl;
+                       }
+                       
+                       out.close();
+               }
+               
+               return 0; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SequenceParser", "getNameMap");
+               exit(1);
+       }
+}
+/************************************************************/
 
 
 
index fa838f0c2adf185eb76472479f47fce786882fc0..23fcb9ecc983e1cf59ef1fde1f976da80773a152 100644 (file)
@@ -42,12 +42,17 @@ class SequenceParser {
                vector<Sequence> getSeqs(string); //returns unique sequences in a specific group
                map<string, string> getNameMap(string); //returns seqName -> namesOfRedundantSeqs separated by commas for a specific group - the name file format, but each line is parsed by group.
                
+               int getSeqs(string, string, bool); //prints unique sequences in a specific group to a file - group, filename, uchimeFormat=false
+               int getNameMap(string, string); //print seqName -> namesOfRedundantSeqs separated by commas for a specific group - group, filename
+               
+               map<string, string> getAllSeqsMap(){ return allSeqsMap; }  //returns map where the key=sequenceName and the value=representativeSequence - helps us remove duplicates after group by group processing
        private:
        
                GroupMap* groupMap;
                MothurOut* m;
        
                int numSeqs;
+               map<string, string> allSeqsMap;
                map<string, vector<Sequence> > seqs; //a vector for each group
                map<string, map<string, string> > nameMapPerGroup; //nameMap for each group
 };