]> git.donarmstrong.com Git - mothur.git/commitdiff
added design and sets parameters to the rarefaction.shared command. finished work...
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 23 May 2012 18:33:47 +0000 (14:33 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 23 May 2012 18:33:47 +0000 (14:33 -0400)
chimeraslayercommand.cpp
classifyseqscommand.cpp
groupmap.cpp
groupmap.h
indicatorcommand.cpp
rarefactsharedcommand.cpp
rarefactsharedcommand.h
shhhercommand.cpp

index 2c435cadb11c1bc413b982676ac58082d4683992..e2b93316a4ade033f6ce4db3988cfd09cbf432e9 100644 (file)
@@ -633,6 +633,7 @@ int ChimeraSlayerCommand::execute(){
 #endif
                                
                                totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName);
+                m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine();
 #ifdef USE_MPI 
                                }
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
@@ -641,7 +642,7 @@ int ChimeraSlayerCommand::execute(){
        
                        if (parser != NULL) { delete parser; } 
                        
-                       m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");       m->mothurOutEndLine();
+            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");   m->mothurOutEndLine();
                }
                
                //set accnos file as new current accnosfile
index 0504e6635802c3f6e3e186e4568675906774b3ff..4e244905ff6b755fd064617368506b9160770048 100644 (file)
@@ -467,7 +467,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
             }
                        
                }
-               
        }
        catch(exception& e) {
                m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
@@ -486,7 +485,7 @@ ClassifySeqsCommand::~ClassifySeqsCommand(){
 int ClassifySeqsCommand::execute(){
        try {
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-               
+        
                if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);             }
                else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
                else {
@@ -507,8 +506,8 @@ int ClassifySeqsCommand::execute(){
                        string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
                        RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
                        if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
-                       RippedTaxName +=  "."; 
-               
+                       if (RippedTaxName != "") { RippedTaxName +=  "."; }
+           
                        if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
                        string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
                        string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
index 481fd1decfc4516eb5e1cf618c6b4d40cffbc8a9..92a43e965044c06c2fdcaca78d50779440b301ab 100644 (file)
@@ -73,7 +73,35 @@ int GroupMap::readDesignMap() {
                m->setAllGroups(namesOfGroups);
                return error;
 }
-
+/************************************************************/
+int GroupMap::readDesignMap(string filename) {
+    groupFileName = filename;
+       m->openInputFile(filename, fileHandle);
+       index = 0;
+    string seqName, seqGroup;
+    int error = 0;
+    
+    while(fileHandle){
+        fileHandle >> seqName; m->gobble(fileHandle);          //read from first column
+        fileHandle >> seqGroup;                        //read from second column
+        
+        if (m->control_pressed) {  fileHandle.close();  return 1; }
+        
+        setNamesOfGroups(seqGroup);
+        
+        it = groupmap.find(seqName);
+        
+        if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine();  }
+        else {
+            groupmap[seqName] = seqGroup;      //store data in map
+            seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+        }
+        m->gobble(fileHandle);
+    }
+    fileHandle.close();
+    m->setAllGroups(namesOfGroups);
+    return error;
+}
 /************************************************************/
 int GroupMap::getNumGroups() { return namesOfGroups.size();    }
 /************************************************************/
index 29c174134eee5689e5210775dbde2f220010d551..567165de820fd480ed2a4ecf8ff785bc9ec1c92c 100644 (file)
 
 class GroupMap {
 public:
-       GroupMap() {};
+       GroupMap() { m = MothurOut::getInstance(); }
        GroupMap(string);
        ~GroupMap();
        int readMap();
        int readDesignMap();
+    int readDesignMap(string);
        int getNumGroups();
        bool isValidGroup(string);  //return true if string is a valid group
        string getGroup(string);
index 97f480e749b7042a07a39d0f48890ff137d49dd0..8d1d7f7a6864b261a0a31611a1e02270730cfd32 100644 (file)
@@ -240,7 +240,6 @@ int IndicatorCommand::execute(){
                        util.setGroups(Groups, nameGroups);
                        designMap->setNamesOfGroups(nameGroups);
                        
-                       //loop through the Groups and fill Globaldata's Groups with the design file info
                        vector<string> namesSeqs = designMap->getNamesSeqs(Groups);
                        m->setGroups(namesSeqs);
                }
index 64bdbb4ceebd18f5abeff63b26fbb4d4ac3dc924..726ddd61b75c6185cca6b4ebac108b7dd1b4d44f 100644 (file)
 #include "rarefactsharedcommand.h"
 #include "sharedsobs.h"
 #include "sharednseqs.h"
+#include "sharedutilities.h"
 
 //**********************************************************************************************************************
 vector<string> RareFactSharedCommand::setParameters(){ 
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
+        CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
                CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
                CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+        CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -39,7 +42,9 @@ string RareFactSharedCommand::getHelpString(){
                string helpString = "";
                ValidCalculators validCalculator;
                helpString += "The collect.shared command parameters are shared, label, freq, calc and groups.  shared is required if there is no current sharedfile. \n";
-               helpString += "The rarefaction.shared command parameters are shared, label, iters, groups, jumble and calc.  shared is required if there is no current sharedfile. \n";
+               helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble and calc.  shared is required if there is no current sharedfile. \n";
+        helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
+        helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
                helpString += "The rarefaction command should be in the following format: \n";
                helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
                helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
@@ -114,6 +119,15 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("design");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["design"] = inputDir + it->second;           }
+                               }
+
                        }
                        
                        //get shared file
@@ -125,6 +139,11 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
                                if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else { m->setSharedFile(sharedfile); }
+            
+            designfile = validParameter.validFile(parameters, "design", true);
+                       if (designfile == "not open") { abort = true; designfile = ""; }
+                       else if (designfile == "not found") {   designfile = "";        }
+                       else { m->setDesignFile(designfile); }
                        
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -159,6 +178,12 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
                                m->splitAtDash(groups, Groups);
                        }
                        m->setGroups(Groups);
+            
+            string sets = validParameter.validFile(parameters, "sets", false);                 
+                       if (sets == "not found") { sets = ""; }
+                       else { 
+                               m->splitAtDash(sets, Sets);
+                       }
                        
                        string temp;
                        temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
@@ -186,11 +211,73 @@ int RareFactSharedCommand::execute(){
        try {
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-       
-               string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        
+        GroupMap designMap;
+        if (designfile == "") { //fake out designMap to run with process
+            process(designMap, "");
+        }else {
+            designMap.readDesignMap(designfile);
+            
+            //fill Sets - checks for "all" and for any typo groups
+                       SharedUtil util;
+                       vector<string> nameSets = designMap.getNamesOfGroups();
+                       util.setGroups(Sets, nameSets);
+            
+            for (int i = 0; i < Sets.size(); i++) {
+                process(designMap, Sets[i]);
+            }
+        }
+                    
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RareFactSharedCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
+       try {
+        Rarefact* rCurve;
+        vector<Display*> rDisplays;
+        
+        InputData input(sharedfile, "sharedfile");
+               lookup = input.getSharedRAbundVectors();
+        if (lookup.size() < 2) { 
+                       m->mothurOut("I cannot run the command without at least 2 valid groups."); 
+            for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+            return 0;
+        }
+        
+        string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        
+        vector<string> newGroups = m->getGroups();
+        if (thisSet != "") {  //make groups only filled with groups from this set so that's all inputdata will read
+            vector<string> thisSets; thisSets.push_back(thisSet);
+            newGroups = designMap.getNamesSeqs(thisSets);
+            fileNameRoot += thisSet + ".";
+        }
+        
+        vector<SharedRAbundVector*> subset;
+        if (thisSet == "") {  subset.clear(); subset = lookup;  }
+        else {//fill subset with this sets groups
+            subset.clear();
+            for (int i = 0; i < lookup.size(); i++) {
+                if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                    subset.push_back(lookup[i]);
+                }
+            }
+        }
                
                ValidCalculators validCalculator;
-                       
                for (int i=0; i<Estimators.size(); i++) {
                        if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
                                if (Estimators[i] == "sharedobserved") { 
@@ -204,82 +291,90 @@ int RareFactSharedCommand::execute(){
                }
                
                //if the users entered no valid calculators don't execute command
-               if (rDisplays.size() == 0) { return 0; }
-                       
-               input = new InputData(sharedfile, "sharedfile");
-               lookup = input->getSharedRAbundVectors();
-               string lastLabel = lookup[0]->getLabel();
+               if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
                
                if (m->control_pressed) { 
-                       m->clearGroups(); 
-                       delete input;
                        for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                        return 0;
                }
-
-
-               if (lookup.size() < 2) { 
-                       m->mothurOut("I cannot run the command without at least 2 valid groups."); 
-                       for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
-                       return 0;
-               }
-               
+        
+        
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        string lastLabel = subset[0]->getLabel();
                set<string> processedLabels;
                set<string> userLabels = labels;
-       
+        
                //as long as you are not at the end of the file or done wih the lines you want
-               while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+               while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        if (m->control_pressed) { 
-                               m->clearGroups(); 
-                               delete input;
                                for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                                for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                return 0;
                        }
                        
-                       if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
-                               m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               rCurve = new Rarefact(lookup, rDisplays);
+                       if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
+                               m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+                               rCurve = new Rarefact(subset, rDisplays);
                                rCurve->getSharedCurve(freq, nIters);
                                delete rCurve;
-                       
-                               processedLabels.insert(lookup[0]->getLabel());
-                               userLabels.erase(lookup[0]->getLabel());
+                
+                               processedLabels.insert(subset[0]->getLabel());
+                               userLabels.erase(subset[0]->getLabel());
                        }
                        
-                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                                       string saveLabel = lookup[0]->getLabel();
-                       
-                                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
-                                       lookup = input->getSharedRAbundVectors(lastLabel);
+                       if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = subset[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                
+                if (thisSet == "") {  subset.clear(); subset = lookup;  }
+                else {//fill subset with this sets groups
+                    subset.clear();
+                    for (int i = 0; i < lookup.size(); i++) {
+                        if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                            subset.push_back(lookup[i]);
+                        }
+                    }
+                }
 
-                                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       rCurve = new Rarefact(lookup, rDisplays);
-                                       rCurve->getSharedCurve(freq, nIters);
-                                       delete rCurve;
-
-                                       processedLabels.insert(lookup[0]->getLabel());
-                                       userLabels.erase(lookup[0]->getLabel());
-                                       
-                                       //restore real lastlabel to save below
-                                       lookup[0]->setLabel(saveLabel);
+                m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+                rCurve = new Rarefact(subset, rDisplays);
+                rCurve->getSharedCurve(freq, nIters);
+                delete rCurve;
+                
+                processedLabels.insert(subset[0]->getLabel());
+                userLabels.erase(subset[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                subset[0]->setLabel(saveLabel);
                        }
-                               
+            
                        
-                       lastLabel = lookup[0]->getLabel();
+                       lastLabel = subset[0]->getLabel();
                        
                        //get next line to process
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
-                       lookup = input->getSharedRAbundVectors();
+                       lookup = input.getSharedRAbundVectors();
+            
+            if (lookup[0] != NULL) {
+                if (thisSet == "") {  subset.clear(); subset = lookup;  }
+                else {//fill subset with this sets groups
+                    subset.clear();
+                    for (int i = 0; i < lookup.size(); i++) {
+                        if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                            subset.push_back(lookup[i]);
+                        }
+                    }
+                }
+            }else {  subset.clear(); subset.push_back(NULL); }
+
                }
                
                if (m->control_pressed) { 
-                       m->clearGroups(); 
-                       delete input;
                        for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                        return 0;
@@ -299,8 +394,6 @@ int RareFactSharedCommand::execute(){
                }
                
                if (m->control_pressed) { 
-                       m->clearGroups(); 
-                       delete input; 
                        for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                        return 0;
@@ -309,33 +402,33 @@ int RareFactSharedCommand::execute(){
                //run last label if you need to
                if (needToRun == true)  {
                        for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i]; }  } 
-                       lookup = input->getSharedRAbundVectors(lastLabel);
-
-                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                       rCurve = new Rarefact(lookup, rDisplays);
+                       lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            if (thisSet == "") {  subset.clear(); subset = lookup;  }
+            else {//fill subset with this sets groups
+                subset.clear();
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                        subset.push_back(lookup[i]);
+                    }
+                }
+            }
+            
+                       m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+                       rCurve = new Rarefact(subset, rDisplays);
                        rCurve->getSharedCurve(freq, nIters);
                        delete rCurve;
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                }
                
                for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
-               m->clearGroups(); 
-               delete input;
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
 
-               return 0;
-       }
+        
+        return 0;
+    }
        catch(exception& e) {
-               m->errorOut(e, "RareFactSharedCommand", "execute");
+               m->errorOut(e, "RareFactSharedCommand", "process");
                exit(1);
        }
 }
-
-
 //**********************************************************************************************************************
index 2112e8d78565712a490de69bedd2ffb44844a768..1a2d944894feabe26b4103e5431c0da08c6574b4 100644 (file)
@@ -36,17 +36,16 @@ public:
 private:
        
        vector<SharedRAbundVector*> lookup;
-       InputData* input;
-       Rarefact* rCurve;
-       vector<Display*> rDisplays;
        int nIters;
        string format;
        float freq;
        
        bool abort, allLines, jumble;
        set<string> labels; //holds labels to be used
-       string label, calc, groups, outputDir, sharedfile;
-       vector<string>  Estimators, Groups, outputNames;
+       string label, calc, groups, outputDir, sharedfile, designfile;
+       vector<string>  Estimators, Groups, outputNames, Sets;
+    
+    int process(GroupMap&, string);
 
 };
 
index a09825e4b9a3cd11dbe77b3dd82e703481c650bc..5bcd7d843a18829793afcc9660af7b7667385c4b 100644 (file)
@@ -2046,18 +2046,24 @@ vector<string> ShhherCommand::parseFlowFiles(string filename){
             out << thisNumFLows << endl;
             files.push_back(outputFileName);
             
+            int numLinesWrote = 0;
             for (int i = 0; i < largeSize; i++) {
                 if (in.eof()) { break; }
-                string line = m->getline(in);
+                string line = m->getline(in); m->gobble(in);
                 out << line << endl;
+                numLinesWrote++;
             }
             out.close();
+            
+            if (numLinesWrote == 0) {  m->mothurRemove(outputFileName); files.pop_back();  }
             count++;
         }
         in.close();
         
         if (m->control_pressed) { for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }  files.clear(); }
         
+        m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n"); 
+        
         return files;
     }
        catch(exception& e) {
@@ -2263,8 +2269,8 @@ int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFil
                         m->mothurRemove(otuCountsFileName);
                         m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups"));
                         m->mothurRemove(groupFileName);
-                        m->mothurRemove(theseFlowFileNames[g]);
                     }
+                    m->mothurRemove(theseFlowFileNames[g]);
                 }
                        }
             
@@ -3319,7 +3325,9 @@ void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int num
                                                        //otuCountsFile << base;
                                                }
                                        }
-                                       otuCountsFile << newSeq.substr(4) << endl;
+                                       
+                    if (newSeq.length() >= 4) {  otuCountsFile << newSeq.substr(4) << endl;  }
+                    else {  otuCountsFile << "NNNN" << endl;  }
                                }
                                otuCountsFile << endl;
                        }