]> git.donarmstrong.com Git - mothur.git/commitdiff
finished shhh.seqs command, fixed bug with remove.groups and get.groups that caused...
authorwestcott <westcott>
Mon, 14 Nov 2011 20:28:59 +0000 (20:28 +0000)
committerwestcott <westcott>
Mon, 14 Nov 2011 20:28:59 +0000 (20:28 +0000)
12 files changed:
chimerauchimecommand.h
getgroupscommand.cpp
getgroupscommand.h
removegroupscommand.cpp
removegroupscommand.h
screenseqscommand.cpp
screenseqscommand.h
sequence.cpp
sequence.hpp
shhhseqscommand.cpp
shhhseqscommand.h
trimflowscommand.cpp

index 6e1f809ce55a332020240bdc0b5e30398b5d5b49..f6e0ab55d5cff328c4a6a247ddb2b72e1ee26df6 100644 (file)
@@ -181,7 +181,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                        
                        string path = pDataArray->m->argv;
                        string tempPath = path;
-                       for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
+                       for (int j = 0; j < path.length(); j++) { tempPath[j] = tolower(path[j]); }
                        path = path.substr(0, (tempPath.find_last_of('m')));
                        
                        string uchimeCommand = path;
index e8f42dda532fab7f252de8c95e220c8eb27fa8f8..e533bc47defa9467b345a0a58cc8b80b59710275 100644 (file)
@@ -384,6 +384,15 @@ int GetGroupsCommand::readFasta(){
                                        
                                        currSeq.printSequence(out);
                                        selectedCount++;
+                               }else{
+                                       //if you are not in the accnos file check if you are a name that needs to be changed
+                                       map<string, string>::iterator it = uniqueToRedundant.find(name);
+                                       if (it != uniqueToRedundant.end()) {
+                                               wroteSomething = true;
+                                               currSeq.setName(it->second);
+                                               currSeq.printSequence(out);
+                                               selectedCount++;
+                                       }
                                }
                        }
                        m->gobble(in);
@@ -499,10 +508,26 @@ int GetGroupsCommand::readList(){
                                        
                                        //if that name is in the .accnos file, add it
                                        if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++;  }
+                                       else{
+                                               //if you are not in the accnos file check if you are a name that needs to be changed
+                                               map<string, string>::iterator it = uniqueToRedundant.find(name);
+                                               if (it != uniqueToRedundant.end()) {
+                                                       newNames += it->second + ",";
+                                                       selectedCount++;
+                                               }
+                                       }
                                }
                                
                                //get last name
                                if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++;  }
+                               else{
+                                       //if you are not in the accnos file check if you are a name that needs to be changed
+                                       map<string, string>::iterator it = uniqueToRedundant.find(binnames);
+                                       if (it != uniqueToRedundant.end()) {
+                                               newNames += it->second + ",";
+                                               selectedCount++;
+                                       }
+                               }
                                
                                //if there are names in this bin add to new list
                                if (newNames != "") {  
@@ -594,6 +619,7 @@ int GetGroupsCommand::readName(){
                                        //you know you have at least one valid second since first column is valid
                                        for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
                                        out << validSecond[validSecond.size()-1] << endl;
+                                       uniqueToRedundant[firstCol] = validSecond[0];
                                }
                        }
                        
@@ -687,6 +713,13 @@ int GetGroupsCommand::readTax(){
                        if (names.count(name) != 0) {
                                wroteSomething = true;
                                out << name << '\t' << tax << endl;
+                       }else{
+                               //if you are not in the accnos file check if you are a name that needs to be changed
+                               map<string, string>::iterator it = uniqueToRedundant.find(name);
+                               if (it != uniqueToRedundant.end()) {
+                                       wroteSomething = true;
+                                       out << it->second << '\t' << tax << endl;
+                               }
                        }
                        
                        m->gobble(in);
index 6d766fb5ebab9f3910e998180bfa92003ca8a68f..e0fbad80064157e99c3ff21fb97ddfd95bc97317 100644 (file)
@@ -36,6 +36,9 @@ public:
        
 private:
        set<string> names;
+       map<string, string> uniqueToRedundant; //if a namefile is given and the first column name is not selected
+                                                                                  //then the other files need to change the unique name in their file to match.
+                                                                                  //only add the names that need to be changed to keep the map search quick
        string accnosfile, fastafile, namefile, groupfile, listfile, taxfile, outputDir, groups, sharedfile;
        bool abort;
        vector<string> outputNames, Groups;
index 725efafebce46296da2d9fd60a367ddc44d18fc2..6ec0fcd34e61500633ecdec6e921ae987fe19e31 100644 (file)
@@ -379,9 +379,16 @@ int RemoveGroupsCommand::readFasta(){
                                //if this name is in the accnos file
                                if (names.count(name) == 0) {
                                        wroteSomething = true;
-                                       
-                                       currSeq.printSequence(out);
-                               }else { removedCount++; }
+                                       currSeq.printSequence(out); 
+                               }else { 
+                                       //if you are not in the accnos file check if you are a name that needs to be changed
+                                       map<string, string>::iterator it = uniqueToRedundant.find(name);
+                                       if (it != uniqueToRedundant.end()) {
+                                               wroteSomething = true;
+                                               currSeq.setName(it->second);
+                                               currSeq.printSequence(out);
+                                       }else { removedCount++; }
+                               }
                        }
                        m->gobble(in);
                }
@@ -527,12 +534,23 @@ int RemoveGroupsCommand::readList(){
                                        
                                        //if that name is in the .accnos file, add it
                                        if (names.count(name) == 0) {  newNames += name + ",";  }
-                                       else { removedCount++; }
+                                       else {
+                                               //if you are not in the accnos file check if you are a name that needs to be changed
+                                               map<string, string>::iterator it = uniqueToRedundant.find(name);
+                                               if (it != uniqueToRedundant.end()) {
+                                                       newNames += it->second + ",";
+                                               }else { removedCount++; }
+                                       }
                                }
                                
                                //get last name
                                if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
-                               else { removedCount++; }
+                               else { //if you are not in the accnos file check if you are a name that needs to be changed
+                                       map<string, string>::iterator it = uniqueToRedundant.find(binnames);
+                                       if (it != uniqueToRedundant.end()) {
+                                               newNames += it->second + ",";
+                                       }else { removedCount++; }
+                               }
                                
                                //if there are names in this bin add to new list
                                if (newNames != "") {  
@@ -624,6 +642,7 @@ int RemoveGroupsCommand::readName(){
                                        //you know you have at least one valid second since first column is valid
                                        for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
                                        out << validSecond[validSecond.size()-1] << endl;
+                                       uniqueToRedundant[firstCol] = validSecond[0];
                                }
                        }
                        
@@ -718,7 +737,12 @@ int RemoveGroupsCommand::readTax(){
                        if (names.count(name) == 0) {
                                wroteSomething = true;
                                out << name << '\t' << tax << endl;
-                       }else {  removedCount++;  }
+                       }else {  //if you are not in the accnos file check if you are a name that needs to be changed
+                               map<string, string>::iterator it = uniqueToRedundant.find(name);
+                               if (it != uniqueToRedundant.end()) {
+                                       wroteSomething = true;
+                                       out << it->second << '\t' << tax << endl;
+                               }else { removedCount++; }  }
                        
                        m->gobble(in);
                }
index 63c0bff316ceda8770afdf19c90bb8f1feed7e6e..103ab66197e7ecd2ea447430a0edfba3eac3c717 100644 (file)
@@ -39,6 +39,10 @@ private:
        bool abort;
        vector<string> outputNames, Groups;
        GroupMap* groupMap;
+       map<string, string> uniqueToRedundant; //if a namefile is given and the first column name is not selected
+       //then the other files need to change the unique name in their file to match.
+       //only add the names that need to be changed to keep the map search quick
+       
        
        int readFasta();
        int readShared();
index d5f2a6c20126dc2a6d8d9d15e0e5d7a15c2ef8bc..cedb74a9b6c7f0a4162cdc96d9b774105e6cc1d0 100644 (file)
@@ -18,6 +18,7 @@ vector<string> ScreenSeqsCommand::setParameters(){
                CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport);
+               CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
                CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
                CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
                CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
@@ -44,8 +45,9 @@ string ScreenSeqsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The screen.seqs command reads a fastafile and creates .....\n";
-               helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, optimize, criteria and processors.\n";
+               helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
                helpString += "The fasta parameter is required.\n";
+               helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
                helpString += "The start parameter .... The default is -1.\n";
                helpString += "The end parameter .... The default is -1.\n";
                helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
@@ -80,6 +82,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(){
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["accnos"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+               outputTypes["taxonomy"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
@@ -118,6 +121,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["accnos"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
+                       outputTypes["taxonomy"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -164,6 +168,13 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
                                
+                               it = parameters.find("taxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
+                               }
                        }
 
                        //check for required parameters
@@ -193,7 +204,11 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        
                        alignreport = validParameter.validFile(parameters, "alignreport", true);
                        if (alignreport == "not open") { abort = true; }
-                       else if (alignreport == "not found") { alignreport = ""; }      
+                       else if (alignreport == "not found") { alignreport = ""; }
+                       
+                       taxonomy = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxonomy == "not open") { abort = true; }
+                       else if (taxonomy == "not found") { taxonomy = ""; }    
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -483,6 +498,7 @@ int ScreenSeqsCommand::execute(){
 
                if(alignreport != "")                                   {       screenAlignReport(badSeqNames);         }
                if(qualfile != "")                                              {       screenQual(badSeqNames);                        }
+               if(taxonomy != "")                                              {       screenTaxonomy(badSeqNames);            }
                
                if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
                
@@ -519,6 +535,11 @@ int ScreenSeqsCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
                }
+               
+               itTypes = outputTypes.find("taxonomy");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
+               }
 
                m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
                m->mothurOutEndLine();
@@ -644,6 +665,16 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                        lines.push_back(new linePair(positions[i], positions[(i+1)]));
                }       
                
+               
+#ifdef USE_MPI
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               
+               if (pid == 0) { //only one process should fix files
+                       driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
+               }
+               
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
+#else
                int numSeqs = 0;
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
@@ -657,7 +688,7 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                        numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
                        if (m->control_pressed) {  return 0; }
                #endif
-
+#endif
                sort(startPosition.begin(), startPosition.end());
                sort(endPosition.begin(), endPosition.end());
                sort(seqLength.begin(), seqLength.end());
@@ -936,6 +967,56 @@ int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
 }
 //***************************************************************************************************************
 
+int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
+       try {
+               ifstream input;
+               m->openInputFile(taxonomy, input);
+               string seqName, tax;
+               set<string>::iterator it;
+               
+               string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + "good" + m->getExtension(taxonomy);
+               outputNames.push_back(goodTaxFile);  outputTypes["taxonomy"].push_back(goodTaxFile);
+               ofstream goodTaxOut;    m->openOutputFile(goodTaxFile, goodTaxOut);
+                               
+               while(!input.eof()){
+                       if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
+                       
+                       input >> seqName >> tax;
+                       it = badSeqNames.find(seqName);
+                       
+                       if(it != badSeqNames.end()){ badSeqNames.erase(it); }
+                       else{
+                               goodTaxOut << seqName << '\t' << tax << endl;
+                       }
+                       m->gobble(input);
+               }
+               
+               if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
+               
+               //we were unable to remove some of the bad sequences
+               if (badSeqNames.size() != 0) {
+                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                               m->mothurOut("Your taxonomy file does not include the sequence " + *it + " please correct."); 
+                               m->mothurOutEndLine();
+                       }
+               }
+               
+               input.close();
+               goodTaxOut.close();
+               
+               if (m->control_pressed) {  m->mothurRemove(goodTaxFile);  return 0; }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenTaxonomy");
+               exit(1);
+       }
+       
+}
+//***************************************************************************************************************
+
 int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
        try {
                ifstream in;
index 87c8beef870b82a95f79bfddcdacba8ab2a1d47e..49d992ac193c7aa42ad4492c9190bda40671fc0b 100644 (file)
@@ -45,6 +45,7 @@ private:
        int screenGroupFile(set<string>);
        int screenAlignReport(set<string>);
        int screenQual(set<string>);
+       int screenTaxonomy(set<string>);
        
        int driver(linePair*, string, string, string, set<string>&);
        int createProcesses(string, string, string, set<string>&);
@@ -54,7 +55,7 @@ private:
        #endif
 
        bool abort;
-       string fastafile, namefile, groupfile, alignreport, outputDir, qualfile;
+       string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy;
        int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
        vector<string> outputNames;
        vector<string> optimize;
index 69e905e6bc93e401a242f695577b95019be191c3..162d3be9e69d0f86f7e4f2122c3c914b613392c3 100644 (file)
@@ -76,10 +76,15 @@ Sequence::Sequence(istringstream& fastaString){
                        
                        while (!fastaString.eof())      {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaString);              
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaString, numAmbig);
+                       
                        setAligned(sequence);   
                        //setUnaligned removes any gap characters for us                                                
-                       setUnaligned(sequence);         
+                       setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+               
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
                
        }
@@ -118,10 +123,14 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
                        
                        while (!fastaString.eof())      {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaString);              
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaString, numAmbig);
                        
                        //setUnaligned removes any gap characters for us                                                
-                       setUnaligned(sequence);         
+                       setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
                
        }
@@ -163,11 +172,15 @@ Sequence::Sequence(ifstream& fastaFile){
                        //read real sequence
                        while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){  break;      }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaFile);                
-       
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaFile, numAmbig);
+                       
                        setAligned(sequence);   
                        //setUnaligned removes any gap characters for us                                                
                        setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
 
        }
@@ -205,10 +218,14 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
                        //read real sequence
                        while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){       break; }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaFile);                
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaFile, numAmbig);
                        
                        //setUnaligned removes any gap characters for us                                                
                        setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
                
        }
@@ -219,10 +236,11 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
 }
 
 //********************************************************************************************************************
-string Sequence::getSequenceString(ifstream& fastaFile) {
+string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
        try {
                char letter;
                string sequence = "";   
+               numAmbig = 0;
                
                while(fastaFile){
                        letter= fastaFile.get();
@@ -233,8 +251,9 @@ string Sequence::getSequenceString(ifstream& fastaFile) {
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
-                               if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G'  && letter != 'C'){
+                               if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G'  && letter != 'C' && letter != 'N'){
                                        letter = 'N';
+                                       numAmbig++;
                                }
                                sequence += letter;
                        }
@@ -270,10 +289,11 @@ string Sequence::getCommentString(ifstream& fastaFile) {
        }
 }
 //********************************************************************************************************************
-string Sequence::getSequenceString(istringstream& fastaFile) {
+string Sequence::getSequenceString(istringstream& fastaFile, int& numAmbig) {
        try {
                char letter;
-               string sequence = "";   
+               string sequence = "";
+               numAmbig = 0;
                
                while(!fastaFile.eof()){
                        letter= fastaFile.get();
@@ -285,6 +305,10 @@ string Sequence::getSequenceString(istringstream& fastaFile) {
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
+                               if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G'  && letter != 'C' && letter != 'N'){
+                                       letter = 'N';
+                                       numAmbig++;
+                               }
                                sequence += letter;
                        }
                }
index 224ae5d7a7452172c3aa3be1e63906e0a876efe2..6a50cb0b8aa31e4a3a3b6c9c97e77416f53eeeb1 100644 (file)
@@ -65,9 +65,9 @@ public:
 private:
        MothurOut* m;
        void initialize();
-       string getSequenceString(ifstream&);
+       string getSequenceString(ifstream&, int&);
        string getCommentString(ifstream&);
-       string getSequenceString(istringstream&);
+       string getSequenceString(istringstream&, int&);
        string getCommentString(istringstream&);
        string name;
        string unaligned;
index 625b93922c15a72096b2a9bb1550185f14dc579e..8ef4c8c24f6d7a6398631591f0e7a2a34d7965c1 100644 (file)
@@ -196,13 +196,16 @@ int ShhhSeqsCommand::execute() {
                        m->openOutputFile(nameFileName, out1); out1.close();
                        mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.";
                        
-                       if(processors == 1)     {       driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups);      }
-                       else                            {       createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups);                       }
+                       vector<string> mapFileNames;
+                       if(processors == 1)     {       mapFileNames = driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups);       }
+                       else                            {       mapFileNames = createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups);                        }
                        
-                       if (m->control_pressed) {    return 0;  }                               
+                       if (m->control_pressed) {    return 0;  }       
                        
-                       //deconvolute results by running unique.seqs
+                       for (int j = 0; j < mapFileNames.size(); j++) { outputNames.push_back(mapFileNames[j]); outputTypes["map"].push_back(mapFileNames[j]); }
                        
+                       //deconvolute results by running unique.seqs
+                       deconvoluteResults(outputFileName, nameFileName);
                        
                        if (m->control_pressed) {   return 0;   }                               
                        
@@ -227,13 +230,13 @@ int ShhhSeqsCommand::execute() {
                        if (m->control_pressed) { m->mothurRemove(distFileName); return 0; }
                        
                        driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, outputFileName, nameFileName, mapFileName); 
+                       outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName);
                }
                
                if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
                
                outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
                outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
-               outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName);
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -335,11 +338,12 @@ int ShhhSeqsCommand::loadData(correctDist* correct, seqNoise& noise, vector<stri
        }
 }
 /**************************************************************************************************/
-int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector<string> groups) {
+vector<string> ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector<string> groups) {
        try {
                
                vector<int> processIDS;
                int process = 1;
+               vector<string> mapfileNames;
                
                //sanity check
                if (groups.size() < processors) { processors = groups.size(); }
@@ -364,7 +368,18 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups);
+                               mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups);
+                               
+                               //pass filenames to parent
+                               ofstream out;
+                               string tempFile = newMName + toString(getpid()) + ".temp";
+                               m->openOutputFile(tempFile, out);
+                               out << mapfileNames.size() << endl;
+                               for (int i = 0; i < mapfileNames.size(); i++) {
+                                       out << mapfileNames[i] << endl;
+                               }
+                               out.close();
+                               
                                exit(0);
                        }else { 
                                m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
@@ -374,7 +389,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                }
                
                //do my part
-               driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
+               mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -382,6 +397,22 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                        wait(&temp);
                }
                
+               //append output files
+               for(int i=0;i<processIDS.size();i++){
+                       ifstream in;
+                       string tempFile =  newMName + toString(processIDS[i]) + ".temp";
+                       m->openInputFile(tempFile, in);
+                       if (!in.eof()) { 
+                               int tempNum = 0; in >> tempNum;  m->gobble(in);
+                               for (int j = 0; j < tempNum; j++) {
+                                       string filename;
+                                       in >> filename; m->gobble(in);
+                                       mapfileNames.push_back(filename);
+                               }
+                       }
+                       in.close(); m->mothurRemove(tempFile);
+                       
+               }
 #else
                
                //////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -397,7 +428,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                for( int i=1; i<processors; i++ ){
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
-                       
+
                        shhhseqsData* tempShhhseqs = new shhhseqsData(fastafile, namefile, groupfile, (newFName+extension), (newNName+extension), newMName, groups, m, lines[i].start, lines[i].end, sigma, i);
                        pDataArray.push_back(tempShhhseqs);
                        processIDS.push_back(i);
@@ -409,13 +440,16 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                
                
                //using the main process as a worker saves time and memory
-               driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
+               mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
+                       for (int j = 0; j < pDataArray[i]->mapfileNames.size(); j++) {
+                               mapfileNames.push_back(pDataArray[i]->mapfileNames[j]);
+                       }
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
@@ -431,7 +465,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                        m->mothurRemove((newNName + toString(processIDS[i]) + ".temp"));
                }
                
-               return 0;       
+               return mapfileNames;    
                
        }
        catch(exception& e) {
@@ -440,14 +474,16 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
        }
 }
 /**************************************************************************************************/
-int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
+vector<string> ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
        try {
                
+               vector<string> mapFileNames;
+               
                for (int i = start; i < end; i++) {
                        
                        start = time(NULL);
                        
-                       if (m->control_pressed) {  return 0; }
+                       if (m->control_pressed) {  return mapFileNames; }
                        
                        m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
                        
@@ -465,26 +501,27 @@ int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, strin
                        
                        //load this groups info in order
                        loadData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs);
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { return mapFileNames; }
                        
                        //calc distances for cluster
                        string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + groups[i] + ".shhh.dist";
                        correct->execute(distFileName);
                        delete correct;
                        
-                       if (m->control_pressed) { m->mothurRemove(distFileName); return 0; }
+                       if (m->control_pressed) { m->mothurRemove(distFileName); return mapFileNames; }
                        
                        driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, newFFile+groups[i], newNFile+groups[i], newMFile+groups[i]+".map"); 
                        
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { return mapFileNames; }
                        
                        m->appendFiles(newFFile+groups[i], newFFile); m->mothurRemove(newFFile+groups[i]);
                        m->appendFiles(newNFile+groups[i], newNFile); m->mothurRemove(newNFile+groups[i]);
+                       mapFileNames.push_back(newMFile+groups[i]+".map");
                        
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + groups[i] + "."); m->mothurOutEndLine(); 
                }
                
-               return 0;
+               return mapFileNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhhSeqsCommand", "driverGroups");
@@ -666,7 +703,7 @@ int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){
                string newfastaFile = filenames["fasta"][0];
                
                m->mothurRemove(fastaFile); rename(newfastaFile.c_str(), fastaFile.c_str()); 
-               m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); 
+               if (nameFile != newnameFile) { m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); }
                
                return 0;
        }
index 5aceca8888e959c4ac4e511b15f954e7564c40c5..1b0211afba43e5577abce79f89704114b3705b2f 100644 (file)
@@ -55,8 +55,8 @@ private:
        int loadData(correctDist*, seqNoise&, vector<string>&, vector<string>&, vector<string>&, vector<int>&, map<string, string>&, vector<Sequence>&);
 
        int driver(seqNoise&, vector<string>&, vector<string>&, vector<string>&, vector<int>&, string, string, string, string);
-       int driverGroups(SequenceParser&, string, string, string, int, int, vector<string>);
-       int createProcessesGroups(SequenceParser&, string, string, string, vector<string>);
+       vector<string> driverGroups(SequenceParser&, string, string, string, int, int, vector<string>);
+       vector<string> createProcessesGroups(SequenceParser&, string, string, string, vector<string>);
        int deconvoluteResults(string, string);
 
 
@@ -77,6 +77,7 @@ struct shhhseqsData {
        int end;
        int sigma, threadID;
        vector<string> groups;
+       vector<string> mapfileNames;
        
        shhhseqsData(){}
        shhhseqsData(string f, string n, string g, string nff,  string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int s, int tid) {
@@ -85,7 +86,7 @@ struct shhhseqsData {
                groupfile = g;
                newFName = nff;
                newNName = nnf;
-               newNName = nmf;
+               newMName = nmf;
                m = mout;
                start = st;
                end = en;
@@ -115,7 +116,7 @@ static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){
                        if (pDataArray->m->control_pressed) {  return 0; }
                        
                        pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Processing group " + pDataArray->groups[k] + ":"); pDataArray->m->mothurOutEndLine();
-                       
+
                        map<string, string> thisNameMap;
                        thisNameMap = parser.getNameMap(pDataArray->groups[k]); 
                        vector<Sequence> thisSeqs = parser.getSeqs(pDataArray->groups[k]);
@@ -311,10 +312,9 @@ static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){
                        
                        pDataArray->m->appendFiles(pDataArray->newFName+pDataArray->groups[k], pDataArray->newFName); pDataArray->m->mothurRemove(pDataArray->newFName+pDataArray->groups[k]);
                        pDataArray->m->appendFiles(pDataArray->newNName+pDataArray->groups[k], pDataArray->newNName); pDataArray->m->mothurRemove(pDataArray->newNName+pDataArray->groups[k]);
+                       pDataArray->mapfileNames.push_back(pDataArray->newMName+pDataArray->groups[k]+".map");
                        
                        pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + pDataArray->groups[k] + "."); pDataArray->m->mothurOutEndLine(); 
-                       
-                                                       
                }
                
                return 0;
@@ -322,7 +322,7 @@ static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){
                
        }
        catch(exception& e) {
-               pDataArray->m->errorOut(e, "PreClusterCommand", "MyPreclusterThreadFunction");
+               pDataArray->m->errorOut(e, "ShhhSeqsCommand", "MyShhhSeqsThreadFunction");
                exit(1);
        }
 } 
index 7a1af627a5e290c6d348e4a3783c645ba575982c..8be4a6ce260f9cd3ffed1bd3833c54ac9c14a939 100644 (file)
@@ -17,8 +17,8 @@ vector<string> TrimFlowsCommand::setParameters(){
                CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow);
                CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
-               CommandParameter pmaxflows("maxflows", "Number", "", "720", "", "", "",false,false); parameters.push_back(pmaxflows);
-               CommandParameter pminflows("minflows", "Number", "", "360", "", "", "",false,false); parameters.push_back(pminflows);
+               CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows);
+               CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
                CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);