]> git.donarmstrong.com Git - mothur.git/commitdiff
working on chimera.uchime change for dereplicate=t bug. added shared file to get...
authorSarahsWork <sarahswork@imac.westcotts.net>
Tue, 5 Mar 2013 19:13:00 +0000 (14:13 -0500)
committerSarahsWork <sarahswork@imac.westcotts.net>
Tue, 5 Mar 2013 19:13:00 +0000 (14:13 -0500)
chimerauchimecommand.cpp
chimerauchimecommand.h
getsharedotucommand.cpp
getsharedotucommand.h
seqerrorcommand.cpp
trimflowscommand.cpp
trimflowscommand.h
trimseqscommand.cpp
trimseqscommand.h

index 9a25582ddad078665bbc3f7e098dc4cbcf047097..3ed61707385f4859a4b4558bd35124fac1b2b417 100644 (file)
@@ -65,7 +65,7 @@ string ChimeraUchimeCommand::getHelpString(){
                helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
-        helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
+        helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
@@ -110,7 +110,8 @@ string ChimeraUchimeCommand::getOutputPattern(string type) {
         
         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
-        else if (type == "alns") {  pattern = "[filename],uchime.alns"; } 
+        else if (type == "alns") {  pattern = "[filename],uchime.alns"; }
+        else if (type == "count") {  pattern = "[filename],uchime.pick.count_table"; } 
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -129,6 +130,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){
                outputTypes["chimera"] = tempOutNames;
                outputTypes["accnos"] = tempOutNames;
                outputTypes["alns"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
@@ -163,6 +165,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        outputTypes["chimera"] = tempOutNames;
                        outputTypes["accnos"] = tempOutNames;
                        outputTypes["alns"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -646,6 +649,7 @@ int ChimeraUchimeCommand::execute(){
                        string accnosFileName = getOutputFileName("accnos", variables);
                        string alnsFileName = getOutputFileName("alns", variables);
                        string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
+            string newCountFile = "";
                                
                        //you provided a groupfile
                        string groupFile = "";
@@ -654,6 +658,8 @@ int ChimeraUchimeCommand::execute(){
             else if (hasCount) {
                 CountTable ct;
                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
+                newCountFile = getOutputFileName("count", variables);
             }
                        
                        if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
@@ -719,19 +725,61 @@ int ChimeraUchimeCommand::execute(){
                                if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
                                int totalSeqs = 0;
                                
-                               if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
-                               else                            {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
+                               if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
+                    
+                    //read my own
+                    if (hasCount && !dups) {
+                        CountTable newCount; newCount.readTable(nameFile);
+                        
+                        if (!m->isBlank(newCountFile)) {
+                            ifstream in2;
+                            m->openInputFile(newCountFile, in2);
+                            
+                            string name, group;
+                            while (!in2.eof()) {
+                                in2 >> name >> group; m->gobble(in2);
+                                newCount.setAbund(name, group, 0);
+                            }
+                            in2.close();
+                        }
+                        m->mothurRemove(newCountFile);
+                        newCount.printTable(newCountFile);
+                    }
+
+                }else                          {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
 
                                if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                if (hasCount) { delete cparser; }
-                else { delete sparser; }
+               
                 
                 if (!dups) { 
                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
                                
                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");     m->mothurOutEndLine();
                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
-                               }
+                               }else {
+                    /*if (hasCount) {  //removed empty seqs
+                        ofstream out2;
+                        m->openOutputFile(accnosFileName, out2);
+                        
+                        CountTable c; c.readTable(newCountFile);
+                        vector<string> nseqs = c.getNamesOfSeqs();
+                        vector<string> ngroups = c.getNamesOfGroups();
+                        for (int l = 0; l < nseqs.size(); l++) {
+                            if (c.getNumSeqs(nseqs[l]) == 0) {
+                                c.remove(nseqs[l]);
+                                out2 << nseqs[l] << endl;
+                            }
+                        }
+                        for (int l = 0; l < ngroups.size(); l++) {
+                            if (c.getGroupCount(ngroups[l]) == 0) {  c.removeGroup(ngroups[l]); }
+                        }
+                        out2.close();
+                        c.printTable(newCountFile);
+                    }*/
+                }
+                
+                if (hasCount) { delete cparser; }
+                else { delete sparser; }
                 
                                if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
                                        
@@ -1128,20 +1176,23 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){
        }
 }
 //**********************************************************************************************************************
-int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
+int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
        try {
                
                int totalSeqs = 0;
                int numChimeras = 0;
-               
+        
+        ofstream outCountList;
+        if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
+        
                for (int i = start; i < end; i++) {
-                       int start = time(NULL);  if (m->control_pressed) {  return 0; }
+                       int start = time(NULL);  if (m->control_pressed) {  outCountList.close(); m->mothurRemove(countlist); return 0; }
             
                        int error;
             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
                        
-                       int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
+                       int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
                        totalSeqs += numSeqs;
                        
                        if (m->control_pressed) { return 0; }
@@ -1150,14 +1201,52 @@ int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, stri
                        if (!m->debug) {  m->mothurRemove(filename);  }
             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
                        
+            //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
+            //This table will zero out group counts for seqs determined to be chimeric by that group.
+            if (dups) {
+                if (!m->isBlank(accnos+groups[i])) {
+                    ifstream in;
+                    m->openInputFile(accnos+groups[i], in);
+                    string name;
+                    if (hasCount) {
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in);
+                            outCountList << name << '\t' << groups[i] << endl;
+                        }
+                        in.close();
+                    }else {
+                        map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
+                        map<string, string>::iterator itN;
+                        ofstream out;
+                        m->openOutputFile(accnos+groups[i]+".temp", out);
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in); 
+                            itN = thisnamemap.find(name);
+                            if (itN != thisnamemap.end()) {
+                                vector<string> tempNames; m->splitAtComma(itN->second, tempNames); 
+                                for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+                                
+                            }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
+                        }
+                        out.close();
+                        in.close();
+                        m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
+                    }
+                   
+                }
+            }
+            
                        //append files
                        m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
                        m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
                        if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
                        
                        m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
-               }       
-               return totalSeqs;
+               }
+
+        if (hasCount && dups) { outCountList.close(); }
+        
+        return totalSeqs;
                
        }
        catch(exception& e) {
@@ -1641,8 +1730,8 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
                        
-                       uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
-                       tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
+                       uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0,  i);
+                       tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
                        tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                        
                        pDataArray.push_back(tempUchime);
@@ -1694,12 +1783,15 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
 }
 /**************************************************************************************************/
 
-int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
+int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
        try {
                
                processIDS.clear();
                int process = 1;
                int num = 0;
+        
+        CountTable newCount;
+        if (hasCount && dups) { newCount.readTable(nameFile); }
                
                //sanity check
                if (groups.size() < processors) { processors = groups.size(); }
@@ -1724,7 +1816,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+                               num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
                                
                                //pass numSeqs to parent
                                ofstream out;
@@ -1742,22 +1834,22 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                }
                
                //do my part
-               num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
                        int temp = processIDS[i];
                        wait(&temp);
                }
-               
+        
                for (int i = 0; i < processIDS.size(); i++) {
                        ifstream in;
                        string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
                        in.close(); m->mothurRemove(tempFile);
-               }
-                               
+        }
+        
 #else
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
@@ -1773,8 +1865,8 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
                        
-                       uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
-                       tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
+                       uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end,  i);
+                       tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
                        tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                        
                        pDataArray.push_back(tempUchime);
@@ -1787,7 +1879,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                
                
                //using the main process as a worker saves time and memory
-               num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -1798,8 +1890,25 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
+        
+        
 #endif         
                
+        //read my own
+        if (hasCount && dups) {
+            if (!m->isBlank(accnos + ".byCount")) {
+                ifstream in2;
+                m->openInputFile(accnos + ".byCount", in2);
+                
+                string name, group;
+                while (!in2.eof()) {
+                    in2 >> name >> group; m->gobble(in2);
+                    newCount.setAbund(name, group, 0);
+                }
+                in2.close();
+            }
+            m->mothurRemove(accnos + ".byCount");
+        }
                                
                //append output files
                for(int i=0;i<processIDS.size();i++){
@@ -1813,7 +1922,26 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                                m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
                                m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
                        }
+            
+            if (hasCount && dups) {
+                if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
+                    ifstream in2;
+                    m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
+                    
+                    string name, group;
+                    while (!in2.eof()) {
+                        in2 >> name >> group; m->gobble(in2);
+                        newCount.setAbund(name, group, 0);
+                    }
+                    in2.close();
+                }
+                m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
+            }
+
                }
+        
+        //print new *.pick.count_table
+        if (hasCount && dups) { newCount.printTable(newCountFile); }
                
                return num;     
                
index 6d9d001a142aaa6357cc81614c5f50af23f2bf76..2237493801aa836dcb84e77e3957a480f30dcc2b 100644 (file)
@@ -63,8 +63,8 @@ private:
        int readFasta(string, map<string, string>&);
        int printFile(vector<seqPriorityNode>&, string);
        int deconvoluteResults(map<string, string>&, string, string, string);
-       int driverGroups(string, string, string, string, int, int, vector<string>);
-       int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
+       int driverGroups(string, string, string, string, string, int, int, vector<string>);
+       int createProcessesGroups(string, string, string, string, string, vector<string>, string, string, string);
     int prepFile(string filename, string);
 
 
@@ -80,17 +80,17 @@ struct uchimeData {
        string namefile; 
        string groupfile;
        string outputFName;
-       string accnos, alns, filename, templatefile, uchimeLocation;
+       string accnos, alns, filename, templatefile, uchimeLocation, countlist;
        MothurOut* m;
        int start;
        int end;
        int threadID, count, numChimeras;
        vector<string> groups;
-       bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
+       bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
        string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
        
        uchimeData(){}
-       uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+       uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, string nc, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
                fastafile = f;
                namefile = n;
                groupfile = g;
@@ -107,8 +107,9 @@ struct uchimeData {
                count = 0;
                numChimeras = 0;
         uchimeLocation = uloc;
+        countlist = nc;
        }
-       void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
+       void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
                useAbskew = Abskew;
                chimealns = calns;
                useMinH = MinH;
@@ -128,6 +129,7 @@ struct uchimeData {
                ucl = uc;
                useQueryfract = Queryfract;
         hasCount = hc;
+        dups = dps;
        }
        
        void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
@@ -183,6 +185,10 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                
                int totalSeqs = 0;
                int numChimeras = 0;
+        
+        ofstream outCountList;
+        if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
+
                
                for (int i = pDataArray->start; i < pDataArray->end; i++) {
                        int start = time(NULL);  if (pDataArray->m->control_pressed) {  if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
@@ -448,9 +454,14 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                        
                        ofstream out;
                        pDataArray->m->openOutputFile(accnos, out);
+            
                        
                        int num = 0;
                        numChimeras = 0;
+            map<string, string> thisnamemap;
+            map<string, string>::iterator itN;
+            if (pDataArray->dups && !pDataArray->hasCount) { thisnamemap = parser->getNameMap(pDataArray->groups[i]); }
+                
                        while(!in.eof()) {
                                
                                if (pDataArray->m->control_pressed) { break; }
@@ -466,7 +477,22 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                                for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
                                pDataArray->m->gobble(in);
                                
-                               if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
+                               if (chimeraFlag == "Y") {
+                    if (pDataArray->dups) {
+                        if (!pDataArray->hasCount) { //output redundant names for each group
+                            itN = thisnamemap.find(name);
+                            if (itN != thisnamemap.end()) {
+                                vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
+                                for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+                            }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
+
+                        }else {
+                            out << name << endl;
+                            outCountList << name << '\t' << pDataArray->groups[i] << endl;
+                        }
+                    }else{  out << name << endl;  }
+                    numChimeras++;
+                }
                                num++;
                        }
                        in.close();
@@ -491,6 +517,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                        
                }       
                
+        if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
                pDataArray->count = totalSeqs;
                if (pDataArray->hasCount) { delete cparser; } { delete parser; }
                return totalSeqs;
index 5ae8da588ba4a6d36b6658527d8cd12e208c50c4..1074d323e9aaeae08b69a2d546fdb8cae544d330 100644 (file)
 //**********************************************************************************************************************
 vector<string> GetSharedOTUCommand::setParameters(){   
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false); parameters.push_back(pfasta);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(pgroup);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(plist);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "sharedFasta", "none", "none","fasta",false,false); parameters.push_back(pfasta);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "groupList","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "sharedList", "sharedList", "groupList","sharedseq",false,false,true); parameters.push_back(plist);
+        CommandParameter pshared("shared", "InputTypes", "", "", "sharedList-sharedFasta", "sharedList", "none","sharedseq",false,false,true); parameters.push_back(pshared);
                CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "","",false,false); parameters.push_back(poutput);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-               CommandParameter punique("unique", "String", "", "", "", "", "","",false,false,true); parameters.push_back(punique);
-               CommandParameter pshared("shared", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pshared);
+               CommandParameter puniquegroups("uniquegroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(puniquegroups);
+               CommandParameter psharedgroups("sharedgroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(psharedgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
 
@@ -36,18 +37,18 @@ vector<string> GetSharedOTUCommand::setParameters(){
 string GetSharedOTUCommand::getHelpString(){   
        try {
                string helpString = "";
-               helpString += "The get.sharedseqs command parameters are list, group, label, unique, shared, output and fasta.  The list and group parameters are required, unless you have valid current files.\n";
+               helpString += "The get.sharedseqs command parameters are list, group, shared, label, uniquegroups, sharedgroups, output and fasta.  The list and group or shared parameters are required, unless you have valid current files.\n";
                helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
-               helpString += "The unique and shared parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
-               helpString += "If you enter your groups under the unique parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
-               helpString += "If you enter your groups under the shared parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
-               helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group file.\n";
-               helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified.\n";
+               helpString += "The uniquegroups and sharedgroups parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
+               helpString += "If you enter your groups under the uniquegroups parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
+               helpString += "If you enter your groups under the sharedgroups parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
+               helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group or shared file.\n";
+               helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified. It can only be used with a list and group file not the shared file input.\n";
                helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
                helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
                helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
-               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
-               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
+               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, uniquegroups=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
+               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=amazon.groups, uniquegroups=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
                helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
                helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
                helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
@@ -145,6 +146,14 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["list"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
                                
                                it = parameters.find("group");
                                //user has given a template file
@@ -159,28 +168,53 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                        //check for required parameters
                        listfile = validParameter.validFile(parameters, "list", true);
                        if (listfile == "not open") { abort = true; }
-                       else if (listfile == "not found") { 
-                               listfile = m->getListFile(); 
-                               if (listfile != "") { format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
-                               else { 
-                                       m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); 
-                                       abort = true;
-                               }
-                       }else {  format = "list";       m->setListFile(listfile); }
+                       else if (listfile == "not found") { listfile = "";                      }
+            else {  format = "list";   m->setListFile(listfile); }
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { abort = true; }  
-                       else if (groupfile == "not found") { 
-                               groupfile = m->getGroupFile(); 
-                               if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
-                               else { 
-                                       m->mothurOut("No valid current group file. You must provide a group file."); m->mothurOutEndLine(); 
-                                       abort = true;
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
+            
+            sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { abort = true; }
+                       else if (sharedfile == "not found") { sharedfile = ""; }
+                       else { m->setSharedFile(sharedfile); }
+            
+            fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") {  fastafile = "";  }
+                       else { m->setFastaFile(fastafile); }
+
+            
+            if ((sharedfile == "") && (listfile == "")) { //look for currents
+                //is there are current file available for either of these?
+                               //give priority to shared, then list
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {
+                                       listfile = m->getListFile();
+                                       if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                                       else {
+                                               m->mothurOut("No valid current files. You must provide a shared or list file."); m->mothurOutEndLine();
+                                               abort = true;
+                                       }
                                }
-                       }else { m->setGroupFile(groupfile); }
-                                               
-                       if ((listfile == "") || (groupfile == "")) { m->mothurOut("The list and group parameters are required."); m->mothurOutEndLine(); abort = true; }
+            }else if ((sharedfile != "") && (listfile != "")) {
+                m->mothurOut("You may enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true;
+            }
                        
+            if (listfile != "") {
+                               if (groupfile == ""){
+                                       groupfile = m->getGroupFile();
+                                       if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                                       else { m->mothurOut("You need to provide a group file if you are going to use the list format."); m->mothurOutEndLine(); abort = true;
+                                       }
+                               }
+                       }
+
+                       if ((sharedfile != "") && (fastafile != "")) { m->mothurOut("You cannot use the fasta file with the shared file."); m->mothurOutEndLine(); abort = true; }
+            
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        label = validParameter.validFile(parameters, "label", false);                   
@@ -194,29 +228,22 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                        if (output == "not found") { output = ""; }
                        else if (output == "default") { output = ""; }
                        
-                       groups = validParameter.validFile(parameters, "unique", false);                 
+                       groups = validParameter.validFile(parameters, "uniquegroups", false);                   
                        if (groups == "not found") { groups = ""; }
                        else { 
                                userGroups = "unique." + groups;
                                m->splitAtDash(groups, Groups);
-                               m->setGroups(Groups);
-                               
                        }
                        
-                       groups = validParameter.validFile(parameters, "shared", false);                 
+                       groups = validParameter.validFile(parameters, "sharedgroups", false);                   
                        if (groups == "not found") { groups = "";  }
                        else { 
                                userGroups = groups;
                                m->splitAtDash(groups, Groups);
-                               m->setGroups(Groups);
                                unique = false;
                        }
                        
-                       fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
-                       else if (fastafile == "not found") {  fastafile = "";  }        
-                       else { m->setFastaFile(fastafile); }
-               }
+        }
 
        }
        catch(exception& e) {
@@ -231,125 +258,128 @@ int GetSharedOTUCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               groupMap = new GroupMap(groupfile);
-               int error = groupMap->readMap();
-               if (error == 1) { delete groupMap; return 0; }
-               
-               if (m->control_pressed) { delete groupMap; return 0; }
-               
-               if (Groups.size() == 0) {
-                       Groups = groupMap->getNamesOfGroups();
-                       
-                       //make string for outputfile name
-                       userGroups = "unique.";
-                       for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
-                       userGroups = userGroups.substr(0, userGroups.length()-1);
-               }else{
-                       //sanity check for group names
-                       SharedUtil util;
-                       vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
-                       util.setGroups(Groups, namesOfGroups);
-                       groupMap->setNamesOfGroups(namesOfGroups);
-               }
-       
-               //put groups in map to find easier
-               for(int i = 0; i < Groups.size(); i++) {
-                       groupFinder[Groups[i]] = Groups[i];
-               }
-               
-               if (fastafile != "") {
-                       ifstream inFasta;
-                       m->openInputFile(fastafile, inFasta);
-                       
-                       while(!inFasta.eof()) {
-                               if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
-                               
-                               Sequence seq(inFasta); m->gobble(inFasta);
-                               if (seq.getName() != "") {  seqs.push_back(seq);   }
-                       }
-                       inFasta.close();
-               }
-               
-               ListVector* lastlist = NULL;
-               string lastLabel = "";
-               
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> processedLabels;
-               set<string> userLabels = labels;
-               
-               ifstream in;
-               m->openInputFile(listfile, in);
-               
-               //as long as you are not at the end of the file or done wih the lines you want
-               while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
-                       
-                       if (m->control_pressed) { 
-                               if (lastlist != NULL) {         delete lastlist;        }
-                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }  outputTypes.clear();
-                               delete groupMap; return 0;
-                       }
-                       
-                       list = new ListVector(in);
-                       
-                       if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
-                               m->mothurOut(list->getLabel()); 
-                               process(list);
-                               
-                               processedLabels.insert(list->getLabel());
-                               userLabels.erase(list->getLabel());
-                       }
-                       
-                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                                       string saveLabel = list->getLabel();
-                                       
-                                       m->mothurOut(lastlist->getLabel()); 
-                                       process(lastlist);
-                                       
-                                       processedLabels.insert(lastlist->getLabel());
-                                       userLabels.erase(lastlist->getLabel());
-                                       
-                                       //restore real lastlabel to save below
-                                       list->setLabel(saveLabel);
-                       }
+        if ( sharedfile != "") { runShared(); }
+        else {
+            m->setGroups(Groups);
+            groupMap = new GroupMap(groupfile);
+            int error = groupMap->readMap();
+            if (error == 1) { delete groupMap; return 0; }
+            
+            if (m->control_pressed) { delete groupMap; return 0; }
+            
+            if (Groups.size() == 0) {
+                Groups = groupMap->getNamesOfGroups();
+                
+                //make string for outputfile name
+                userGroups = "unique.";
+                for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
+                userGroups = userGroups.substr(0, userGroups.length()-1);
+            }else{
+                //sanity check for group names
+                SharedUtil util;
+                vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
+                util.setGroups(Groups, namesOfGroups);
+                groupMap->setNamesOfGroups(namesOfGroups);
+            }
+        
+            //put groups in map to find easier
+            for(int i = 0; i < Groups.size(); i++) {
+                groupFinder[Groups[i]] = Groups[i];
+            }
+            
+            if (fastafile != "") {
+                ifstream inFasta;
+                m->openInputFile(fastafile, inFasta);
+                
+                while(!inFasta.eof()) {
+                    if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
+                    
+                    Sequence seq(inFasta); m->gobble(inFasta);
+                    if (seq.getName() != "") {  seqs.push_back(seq);   }
+                }
+                inFasta.close();
+            }
+            
+            ListVector* lastlist = NULL;
+            string lastLabel = "";
+            
+            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+            set<string> processedLabels;
+            set<string> userLabels = labels;
+            
+            ifstream in;
+            m->openInputFile(listfile, in);
+            
+            //as long as you are not at the end of the file or done wih the lines you want
+            while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
+                
+                if (m->control_pressed) { 
+                    if (lastlist != NULL) {            delete lastlist;        }
+                    for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); }  outputTypes.clear();
+                    delete groupMap; return 0;
+                }
+                
+                list = new ListVector(in);
+                
+                if(allLines == 1 || labels.count(list->getLabel()) == 1){                      
+                    m->mothurOut(list->getLabel()); 
+                    process(list);
+                    
+                    processedLabels.insert(list->getLabel());
+                    userLabels.erase(list->getLabel());
+                }
+                
+                if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                        string saveLabel = list->getLabel();
+                        
+                        m->mothurOut(lastlist->getLabel()); 
+                        process(lastlist);
+                        
+                        processedLabels.insert(lastlist->getLabel());
+                        userLabels.erase(lastlist->getLabel());
+                        
+                        //restore real lastlabel to save below
+                        list->setLabel(saveLabel);
+                }
 
-                       lastLabel = list->getLabel();
-                       
-                       if (lastlist != NULL) {         delete lastlist;        }
-                       lastlist = list;                        
-               }
-               
-               in.close();
-               
-               //output error messages about any remaining user labels
-               set<string>::iterator it;
-               bool needToRun = false;
-               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       m->mothurOut("Your file does not include the label " + *it); 
-                       if (processedLabels.count(lastLabel) != 1) {
-                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
-                               needToRun = true;
-                       }else {
-                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
-                       }
-               }
-               
-               //run last label if you need to
-               if (needToRun == true)  {
-                               m->mothurOut(lastlist->getLabel()); 
-                               process(lastlist);
-                                       
-                               processedLabels.insert(lastlist->getLabel());
-                               userLabels.erase(lastlist->getLabel());
-               }
-               
+                lastLabel = list->getLabel();
+                
+                if (lastlist != NULL) {                delete lastlist;        }
+                lastlist = list;                       
+            }
+            
+            in.close();
+            
+            //output error messages about any remaining user labels
+            set<string>::iterator it;
+            bool needToRun = false;
+            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                m->mothurOut("Your file does not include the label " + *it); 
+                if (processedLabels.count(lastLabel) != 1) {
+                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                    needToRun = true;
+                }else {
+                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                }
+            }
+            
+            //run last label if you need to
+            if (needToRun == true)  {
+                    m->mothurOut(lastlist->getLabel()); 
+                    process(lastlist);
+                        
+                    processedLabels.insert(lastlist->getLabel());
+                    userLabels.erase(lastlist->getLabel());
+            }
+            
 
-               //reset groups parameter
-               m->clearGroups();  
-               
-               if (lastlist != NULL) {         delete lastlist;        }
-               
-               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
-               
+            //reset groups parameter
+            m->clearGroups();  
+            
+            if (lastlist != NULL) {            delete lastlist;        }
+            
+            if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
+               }
                //set fasta file as new current fastafile
                string current = "";
                itTypes = outputTypes.find("fasta");
@@ -521,5 +551,206 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                exit(1);
        }
 }
+/***********************************************************/
+int GetSharedOTUCommand::runShared() {
+       try {
+        InputData input(sharedfile, "sharedfile");
+               vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+        
+        if (Groups.size() == 0) {
+            Groups = m->getGroups();
+            
+            //make string for outputfile name
+            userGroups = "unique.";
+            for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
+            userGroups = userGroups.substr(0, userGroups.length()-1);
+        }else {
+            //sanity check for group names
+            SharedUtil util;
+            vector<string> allGroups = m->getAllGroups();
+            util.setGroups(Groups, allGroups);
+        }
+        
+        //put groups in map to find easier
+        for(int i = 0; i < Groups.size(); i++) {
+            groupFinder[Groups[i]] = Groups[i];
+        }
+
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+        
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       if (m->control_pressed) {
+                outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }
+                for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
+                       }
+            
+            
+                       if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                               m->mothurOut(lookup[0]->getLabel()); 
+                               process(lookup);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                
+                m->mothurOut(lookup[0]->getLabel()); 
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();
+            
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                       lookup = input.getSharedRAbundVectors();
+               }
+               
+               if (m->control_pressed) {
+            outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }
+            for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
+        }
+        
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {
+                       m->mothurOut("Your file does not include the label " + *it);
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {        delete lookup[i];       } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); 
+            process(lookup);
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+               }
+        
+               //reset groups parameter
+               m->clearGroups();  
+               
+               return 0;
+
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetSharedOTUCommand", "runShared");
+               exit(1);
+       }
+}
+/***********************************************************/
+int GetSharedOTUCommand::process(vector<SharedRAbundVector*>& lookup) {
+       try {
+               
+               string outputFileNames;
+               if (outputDir == "") { outputDir += m->hasPath(sharedfile); }
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+        variables["[group]"] = userGroups;
+               if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); }
+               else { outputFileNames = getOutputFileName("accnos", variables); }
+        
+        ofstream outNames;
+               m->openOutputFile(outputFileNames, outNames);
+               
+               bool wroteSomething = false;
+               int num = 0;
+        
+               //go through each bin, find out if shared
+               for (int i = 0; i < lookup[0]->getNumBins(); i++) {
+                       if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
+                       
+                       bool uniqueOTU = true;
+                       map<string, int> atLeastOne;
+                       for (int f = 0; f < Groups.size(); f++) {  atLeastOne[Groups[f]] = 0;  }
+                       
+                       set<string> namesOfGroupsInThisBin;
+                       
+                       for(int j = 0; j < lookup.size(); j++) {
+                               string seqGroup = lookup[j]->getGroup();
+                string name = m->currentBinLabels[i];
+                               
+                if (lookup[j]->getAbundance(i) != 0) {
+                    if (output != "accnos") {
+                        namesOfGroupsInThisBin.insert(name + "|" + seqGroup + "|" + toString(lookup[j]->getAbundance(i)));
+                    }else {  namesOfGroupsInThisBin.insert(name);      }
+                    
+                    //is this seq in one of the groups we care about
+                    it = groupFinder.find(seqGroup);
+                    if (it == groupFinder.end()) {  uniqueOTU = false;  } //you have sequences from a group you don't want
+                    else {  atLeastOne[seqGroup]++;  }
+                               }
+                       }
+                       
+                       //make sure you have at least one seq from each group you want
+                       bool sharedByAll = true;
+                       map<string, int>::iterator it2;
+                       for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) {
+                               if (it2->second == 0) {  sharedByAll = false;   }
+                       }
+                       
+                       //if the user wants unique bins and this is unique then print
+                       //or this the user wants shared bins and this bin is shared then print
+                       if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) {
+                               
+                               wroteSomething = true;
+                               num++;
+                               
+                               //output list of names
+                               for (set<string>::iterator itNames = namesOfGroupsInThisBin.begin(); itNames != namesOfGroupsInThisBin.end(); itNames++) {
+                                       outNames << (*itNames) << endl;
+                               }
+                       }
+               }
+               outNames.close();
+               
+               if (!wroteSomething) {
+                       m->mothurRemove(outputFileNames);
+                       string outputString = "\t" + toString(num) + " - No otus shared by groups";
+                       
+                       string groupString = "";
+                       for (int h = 0; h < Groups.size(); h++) {
+                               groupString += "  " + Groups[h];
+                       }
+                       
+                       outputString += groupString + ".";
+                       m->mothurOut(outputString); m->mothurOutEndLine();
+               }else {
+                       m->mothurOut("\t" + toString(num)); m->mothurOutEndLine();
+                       outputNames.push_back(outputFileNames);
+                       if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); }
+                       else { outputTypes["accnos"].push_back(outputFileNames); }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetSharedOTUCommand", "process");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
index 583c2e001a00607cd97771e8fdfa42cbd8866c9f..f03b5ddcf6b5de00eba42210d39c74e46848d73c 100644 (file)
@@ -14,6 +14,8 @@
 #include "listvector.hpp"
 #include "sequence.hpp"
 #include "groupmap.h"
+#include "sharedrabundvector.h"
+#include "inputdata.h"
 
 //**********************************************************************************************************************
 class GetSharedOTUCommand : public Command {
@@ -44,7 +46,7 @@ class GetSharedOTUCommand : public Command {
                GroupMap* groupMap;
                
                set<string> labels;
-               string fastafile, label, groups, listfile, groupfile, output, userGroups, outputDir, format;
+               string fastafile, label, groups, listfile, groupfile, sharedfile, output, userGroups, outputDir, format;
                bool abort, allLines, unique;
                vector<string> Groups;
                map<string, string> groupFinder;
@@ -53,6 +55,8 @@ class GetSharedOTUCommand : public Command {
                vector<string> outputNames;
                
                int process(ListVector*);
+        int process(vector<SharedRAbundVector*>&);
+        int runShared();
                
 };
 //**********************************************************************************************************************
index 0a6eae93df72e8556554bef32c5a52074a4d0a54..641a3abce04bde1b8002e11af636fcd5e58bc6e2 100644 (file)
@@ -407,6 +407,13 @@ int SeqErrorCommand::execute(){
                m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");
                m->mothurOutEndLine();
                
+        //set fasta file as new current fastafile
+               string current = "";
+               itTypes = outputTypes.find("errorseq");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+               }
+        
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
index 61b19b8dbaa61546c0e984e444fa8ddda5b366f1..7ecdf74916aa6ea61150d0174b685e8987a624c9 100644 (file)
@@ -489,7 +489,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                 if (pos == string::npos) {             
                     flowData.printFlows(trimFlowFile);
                     
-                    if(fasta)  {       currSeq.printSequence(fastaFile);       }
+                    if(fasta)  {       currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile);   }
                     
                     if(allFiles){
                         ofstream output;
index 065e6b4532e071383c39c54e38047b01a2b45344..f5493eb7517db72c20048da57080a587ae675f7e 100644 (file)
@@ -224,7 +224,7 @@ static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){
                                
                                flowData.printFlows(trimFlowFile);
                                
-                               if(pDataArray->fasta)   {       currSeq.printSequence(fastaFile);       }
+                               if(pDataArray->fasta)   {       currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile);   }
                                
                                if(pDataArray->allFiles){
                                        ofstream output;
index 709202f7d690c17e8a6edb7f5a7812047aec9177..4b90cda9355112d746999e2a90f9752f5befdf38 100644 (file)
@@ -684,15 +684,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
             //create reoriented primer and barcode pairs
             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
             for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
-                cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
-                cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
-                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+                  oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
                 rpairedPrimers[it->first] = tempPair;
             }
             for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
-                cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
-                cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
-                oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
                 rpairedBarcodes[it->first] = tempPair;
             }
             rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
@@ -1561,6 +1557,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        }
                     
                     oligosPair newPrimer(oligo, roligo);
+                    
+                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
                                        
                                        //check for repeat barcodes
                     string tempPair = oligo+roligo;
@@ -1604,7 +1602,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                             if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
                         }
                         
-                        oligosPair newPair(oligo, reverseOligo(reverseBarcode));
+                        reverseBarcode = reverseOligo(reverseBarcode);
+                        oligosPair newPair(oligo, reverseBarcode);
                         
                         if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
                         
index 4d0a6112cb6e288600ae944e9340563f5973da47..882f716357515b008c06889f3f1559a4e4525cd5 100644 (file)
@@ -261,7 +261,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                
         TrimOligos* trimOligos = NULL;
         int numBarcodes = pDataArray->barcodes.size();
-        if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); }
+        if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
         else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
         
         TrimOligos* rtrimOligos = NULL;
@@ -283,7 +283,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
                                   
                        if (pDataArray->m->control_pressed) {
-                delete trimOligos;
+                delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; }
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
                                if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
                 if(pDataArray->qFileName != "")        {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
@@ -372,6 +372,10 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         currentSeqsDiffs = thisCurrentSeqsDiffs;
                         barcodeIndex = thisBarcodeIndex;
                         primerIndex = thisPrimerIndex;
+                        currSeq.reverseComplement();
+                        if(pDataArray->qFileName != ""){
+                            currQual.flipQScores();
+                        }
                     }
                 }
 
@@ -567,12 +571,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        }
                        
                        //report progress
-                       if((i) % 1000 == 0){    pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }
+                       if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                        
                }
                //report progress
                if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                
+        if (pDataArray->reorient) { delete rtrimOligos; }
                delete trimOligos;
                inFASTA.close();
                trimFASTAFile.close();