Merge remote-tracking branch 'mothur/master'

author Pat Schloss <pschloss@umich.edu>

Tue, 12 Mar 2013 12:22:50 +0000 (08:22 -0400)

committer Pat Schloss <pschloss@umich.edu>

Tue, 12 Mar 2013 12:22:50 +0000 (08:22 -0400)
author Pat Schloss <pschloss@umich.edu>
Tue, 12 Mar 2013 12:22:50 +0000 (08:22 -0400)
committer Pat Schloss <pschloss@umich.edu>
Tue, 12 Mar 2013 12:22:50 +0000 (08:22 -0400)
diff --git a/amovacommand.cpp b/amovacommand.cpp

index a78aafcbd92a344420c29598f6cac4456404a890..492a096f76cc1de12514731f66514e8d632c0c5f 100644 (file)
--- a/amovacommand.cpp
+++ b/amovacommand.cpp
@@ -307,7 +307,7 @@ double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > gro
                 for(int i=0;i<iters;i++){
                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
                         double ssWithinRand = calcSSWithin(randomizedGroup);
-                       if(ssWithinRand < ssWithinOrig){        counter++;      }
+                       if(ssWithinRand <= ssWithinOrig){       counter++;      }
                 }
                 
                 double pValue = (double)counter / (double) iters;
diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp

index 9a25582ddad078665bbc3f7e098dc4cbcf047097..cb155eb2e53c3b8ba47c4ed36aa495efaef4460a 100644 (file)
--- a/chimerauchimecommand.cpp
+++ b/chimerauchimecommand.cpp
@@ -65,7 +65,7 @@ string ChimeraUchimeCommand::getHelpString(){
                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
-        helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
+        helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
          helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
@@ -110,7 +110,8 @@ string ChimeraUchimeCommand::getOutputPattern(string type) {
          
          if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
          else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
-        else if (type == "alns") {  pattern = "[filename],uchime.alns"; } 
+        else if (type == "alns") {  pattern = "[filename],uchime.alns"; }
+        else if (type == "count") {  pattern = "[filename],uchime.pick.count_table"; } 
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -129,6 +130,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){
                 outputTypes["chimera"] = tempOutNames;
                 outputTypes["accnos"] = tempOutNames;
                 outputTypes["alns"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
@@ -163,6 +165,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                         outputTypes["chimera"] = tempOutNames;
                         outputTypes["accnos"] = tempOutNames;
                         outputTypes["alns"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                         
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -646,6 +649,7 @@ int ChimeraUchimeCommand::execute(){
                         string accnosFileName = getOutputFileName("accnos", variables);
                         string alnsFileName = getOutputFileName("alns", variables);
                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
+            string newCountFile = "";
                                 
                         //you provided a groupfile
                         string groupFile = "";
@@ -654,6 +658,8 @@ int ChimeraUchimeCommand::execute(){
              else if (hasCount) {
                  CountTable ct;
                  if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
+                newCountFile = getOutputFileName("count", variables);
              }
                         
                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
@@ -719,19 +725,60 @@ int ChimeraUchimeCommand::execute(){
                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
                                 int totalSeqs = 0;
                                 
-                               if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
-                               else                            {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
+                               if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
+                    
+                    if (hasCount && dups) {
+                        CountTable c; c.readTable(nameFile);
+                        if (!m->isBlank(newCountFile)) {
+                            ifstream in2;
+                            m->openInputFile(newCountFile, in2);
+                            
+                            string name, group;
+                            while (!in2.eof()) {
+                                in2 >> name >> group; m->gobble(in2);
+                                c.setAbund(name, group, 0);
+                            }
+                            in2.close();
+                        }
+                        m->mothurRemove(newCountFile);
+                        c.printTable(newCountFile);
+                    }
+
+                }else                          {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
  
                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                if (hasCount) { delete cparser; }
-                else { delete sparser; }
+               
                  
                  if (!dups) { 
                      int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
                                 
                      m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");     m->mothurOutEndLine();
                      m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
-                               }
+                               }else {
+                    
+                    if (hasCount) {
+                        set<string> doNotRemove;
+                        CountTable c; c.readTable(newCountFile);
+                        vector<string> namesInTable = c.getNamesOfSeqs();
+                        for (int i = 0; i < namesInTable.size(); i++) {
+                            int temp = c.getNumSeqs(namesInTable[i]);
+                            if (temp == 0) {  c.remove(namesInTable[i]);  }
+                            else { doNotRemove.insert((namesInTable[i])); }
+                        }
+                        //remove names we want to keep from accnos file.
+                        set<string> accnosNames = m->readAccnos(accnosFileName);
+                        ofstream out2;
+                        m->openOutputFile(accnosFileName, out2);
+                        for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+                            if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
+                        }
+                        out2.close();
+                        c.printTable(newCountFile);
+                    }
+                }
+                
+                if (hasCount) { delete cparser; }
+                else { delete sparser; }
                  
                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
                                         
@@ -1128,20 +1175,23 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){
         }
  }
  //**********************************************************************************************************************
-int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
+int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
         try {
                 
                 int totalSeqs = 0;
                 int numChimeras = 0;
-               
+        
+        ofstream outCountList;
+        if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
+        
                 for (int i = start; i < end; i++) {
-                       int start = time(NULL);  if (m->control_pressed) {  return 0; }
+                       int start = time(NULL);  if (m->control_pressed) {  outCountList.close(); m->mothurRemove(countlist); return 0; }
              
                         int error;
              if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
              else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
                         
-                       int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
+                       int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
                         totalSeqs += numSeqs;
                         
                         if (m->control_pressed) { return 0; }
@@ -1150,14 +1200,52 @@ int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, stri
                         if (!m->debug) {  m->mothurRemove(filename);  }
              else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
                         
+            //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
+            //This table will zero out group counts for seqs determined to be chimeric by that group.
+            if (dups) {
+                if (!m->isBlank(accnos+groups[i])) {
+                    ifstream in;
+                    m->openInputFile(accnos+groups[i], in);
+                    string name;
+                    if (hasCount) {
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in);
+                            outCountList << name << '\t' << groups[i] << endl;
+                        }
+                        in.close();
+                    }else {
+                        map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
+                        map<string, string>::iterator itN;
+                        ofstream out;
+                        m->openOutputFile(accnos+groups[i]+".temp", out);
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in); 
+                            itN = thisnamemap.find(name);
+                            if (itN != thisnamemap.end()) {
+                                vector<string> tempNames; m->splitAtComma(itN->second, tempNames); 
+                                for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+                                
+                            }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
+                        }
+                        out.close();
+                        in.close();
+                        m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
+                    }
+                   
+                }
+            }
+            
                         //append files
                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
                         
                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
-               }       
-               return totalSeqs;
+               }
+
+        if (hasCount && dups) { outCountList.close(); }
+        
+        return totalSeqs;
                 
         }
         catch(exception& e) {
@@ -1641,8 +1729,8 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                         // Allocate memory for thread data.
                         string extension = toString(i) + ".temp";
                         
-                       uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
-                       tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
+                       uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0,  i);
+                       tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                         
                         pDataArray.push_back(tempUchime);
@@ -1694,12 +1782,15 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
  }
  /**************************************************************************************************/
  
-int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
+int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
         try {
                 
                 processIDS.clear();
                 int process = 1;
                 int num = 0;
+        
+        CountTable newCount;
+        if (hasCount && dups) { newCount.readTable(nameFile); }
                 
                 //sanity check
                 if (groups.size() < processors) { processors = groups.size(); }
@@ -1724,7 +1815,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                 process++;
                         }else if (pid == 0){
-                               num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+                               num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
                                 
                                 //pass numSeqs to parent
                                 ofstream out;
@@ -1742,22 +1833,22 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                 }
                 
                 //do my part
-               num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<processIDS.size();i++) { 
                         int temp = processIDS[i];
                         wait(&temp);
                 }
-               
+        
                 for (int i = 0; i < processIDS.size(); i++) {
                         ifstream in;
                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
                         m->openInputFile(tempFile, in);
                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
                         in.close(); m->mothurRemove(tempFile);
-               }
-                               
+        }
+        
  #else
                 //////////////////////////////////////////////////////////////////////////////////////////////////////
                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
@@ -1773,8 +1864,8 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                         // Allocate memory for thread data.
                         string extension = toString(i) + ".temp";
                         
-                       uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
-                       tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
+                       uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end,  i);
+                       tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                         
                         pDataArray.push_back(tempUchime);
@@ -1787,7 +1878,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                 
                 
                 //using the main process as a worker saves time and memory
-               num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                 
                 //Wait until all threads have terminated.
                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -1798,9 +1889,26 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
+        
+        
  #endif         
-               
-                               
+      
+        //read my own
+        if (hasCount && dups) {
+            if (!m->isBlank(accnos + ".byCount")) {
+                ifstream in2;
+                m->openInputFile(accnos + ".byCount", in2);
+                
+                string name, group;
+                while (!in2.eof()) {
+                    in2 >> name >> group; m->gobble(in2);
+                    newCount.setAbund(name, group, 0);
+                }
+                in2.close();
+            }
+            m->mothurRemove(accnos + ".byCount");
+        }
+       
                 //append output files
                 for(int i=0;i<processIDS.size();i++){
                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
@@ -1813,8 +1921,27 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
                         }
+            
+            if (hasCount && dups) {
+                if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
+                    ifstream in2;
+                    m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
+                    
+                    string name, group;
+                    while (!in2.eof()) {
+                        in2 >> name >> group; m->gobble(in2);
+                        newCount.setAbund(name, group, 0);
+                    }
+                    in2.close();
+                }
+                m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
+            }
+
                 }
-               
+        
+        //print new *.pick.count_table
+        if (hasCount && dups) {  newCount.printTable(newCountFile);   }
+               
                 return num;     
                 
         }
diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h

index 6d9d001a142aaa6357cc81614c5f50af23f2bf76..2237493801aa836dcb84e77e3957a480f30dcc2b 100644 (file)
--- a/chimerauchimecommand.h
+++ b/chimerauchimecommand.h
@@ -63,8 +63,8 @@ private:
         int readFasta(string, map<string, string>&);
         int printFile(vector<seqPriorityNode>&, string);
         int deconvoluteResults(map<string, string>&, string, string, string);
-       int driverGroups(string, string, string, string, int, int, vector<string>);
-       int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
+       int driverGroups(string, string, string, string, string, int, int, vector<string>);
+       int createProcessesGroups(string, string, string, string, string, vector<string>, string, string, string);
      int prepFile(string filename, string);
  
  
@@ -80,17 +80,17 @@ struct uchimeData {
         string namefile; 
         string groupfile;
         string outputFName;
-       string accnos, alns, filename, templatefile, uchimeLocation;
+       string accnos, alns, filename, templatefile, uchimeLocation, countlist;
         MothurOut* m;
         int start;
         int end;
         int threadID, count, numChimeras;
         vector<string> groups;
-       bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
+       bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
         string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
         
         uchimeData(){}
-       uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+       uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, string nc, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
                 fastafile = f;
                 namefile = n;
                 groupfile = g;
@@ -107,8 +107,9 @@ struct uchimeData {
                 count = 0;
                 numChimeras = 0;
          uchimeLocation = uloc;
+        countlist = nc;
         }
-       void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
+       void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
                 useAbskew = Abskew;
                 chimealns = calns;
                 useMinH = MinH;
@@ -128,6 +129,7 @@ struct uchimeData {
                 ucl = uc;
                 useQueryfract = Queryfract;
          hasCount = hc;
+        dups = dps;
         }
         
         void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
@@ -183,6 +185,10 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                 
                 int totalSeqs = 0;
                 int numChimeras = 0;
+        
+        ofstream outCountList;
+        if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
+
                 
                 for (int i = pDataArray->start; i < pDataArray->end; i++) {
                         int start = time(NULL);  if (pDataArray->m->control_pressed) {  if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
@@ -448,9 +454,14 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                         
                         ofstream out;
                         pDataArray->m->openOutputFile(accnos, out);
+            
                         
                         int num = 0;
                         numChimeras = 0;
+            map<string, string> thisnamemap;
+            map<string, string>::iterator itN;
+            if (pDataArray->dups && !pDataArray->hasCount) { thisnamemap = parser->getNameMap(pDataArray->groups[i]); }
+                
                         while(!in.eof()) {
                                 
                                 if (pDataArray->m->control_pressed) { break; }
@@ -466,7 +477,22 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                                 for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
                                 pDataArray->m->gobble(in);
                                 
-                               if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
+                               if (chimeraFlag == "Y") {
+                    if (pDataArray->dups) {
+                        if (!pDataArray->hasCount) { //output redundant names for each group
+                            itN = thisnamemap.find(name);
+                            if (itN != thisnamemap.end()) {
+                                vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
+                                for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+                            }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
+
+                        }else {
+                            out << name << endl;
+                            outCountList << name << '\t' << pDataArray->groups[i] << endl;
+                        }
+                    }else{  out << name << endl;  }
+                    numChimeras++;
+                }
                                 num++;
                         }
                         in.close();
@@ -491,6 +517,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                         
                 }       
                 
+        if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
                 pDataArray->count = totalSeqs;
                 if (pDataArray->hasCount) { delete cparser; } { delete parser; }
                 return totalSeqs;
diff --git a/counttable.cpp b/counttable.cpp

index ad0b2dadfcba7eb2f866b44d3d9b3011242b096a..c16bf9227a51580cac00de78a215b89e2a93e001 100644 (file)
--- a/counttable.cpp
+++ b/counttable.cpp
@@ -283,7 +283,23 @@ int CountTable::printTable(string file) {
          for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
          out << endl;
          
-        for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
+        map<int, string> reverse; //use this to preserve order
+        for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
+        
+        for (int i = 0; i < totals.size(); i++) {
+            map<int, string>::iterator itR = reverse.find(i);
+            
+            if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+                out << itR->second << '\t' << totals[i] << '\t';
+                if (hasGroups) {
+                    for (int j = 0; j < groups.size(); j++) {
+                        out << counts[i][j] << '\t';
+                    }
+                }
+                out << endl;
+            }
+        }
+        /*for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
              out << itNames->first << '\t' << totals[itNames->second] << '\t';
              if (hasGroups) {
                  
@@ -292,7 +308,7 @@ int CountTable::printTable(string file) {
                  }
              }
              out << endl;
-        }
+        }*/
          out.close();
          return 0;
      }
@@ -364,7 +380,7 @@ int CountTable::getGroupCount(string groupName) {
          if (hasGroups) {
              map<string, int>::iterator it = indexGroupMap.find(groupName);
              if (it == indexGroupMap.end()) {
-                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
              }else { 
                  return totalGroups[it->second];
              }
@@ -384,11 +400,11 @@ int CountTable::getGroupCount(string seqName, string groupName) {
          if (hasGroups) {
              map<string, int>::iterator it = indexGroupMap.find(groupName);
              if (it == indexGroupMap.end()) {
-                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
              }else { 
                  map<string, int>::iterator it2 = indexNameMap.find(seqName);
                  if (it2 == indexNameMap.end()) {
-                    m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                    m->mothurOut("[ERROR]: seq " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
                  }else { 
                      return counts[it2->second][it->second];
                  }
diff --git a/flowdata.cpp b/flowdata.cpp

index f4605b6dbe1613fdb3e1a6beb1929a3585ff5fe7..b2e856c28e5c939a00102e19d34a97799c11ae08 100644 (file)
--- a/flowdata.cpp
+++ b/flowdata.cpp
@@ -124,18 +124,40 @@ void FlowData::updateEndFlow(){
  }
  
  //**********************************************************************************************************************
-
+//TATGCT
+//1 0 0 0 0 1
+//then the second positive flow is for a T, but you saw a T between the last and previous flow adn it wasn't positive, so something is missing
+//Becomes TNT
  void FlowData::translateFlow(){
-       
         try{
-               sequence = "";
-               for(int i=0;i<endFlow;i++){
+        sequence = "";
+        set<char> charInMiddle;
+        int oldspot = -1;
+        bool updateOld = false;
+        
+        for(int i=0;i<endFlow;i++){
                         int intensity = (int)(flowData[i] + 0.5);
                         char base = baseFlow[i % baseFlow.length()];
+            
+            if (intensity == 0) { //are we in the middle
+                if (oldspot != -1) { charInMiddle.insert(base); }
+            }else if (intensity >= 1) {
+                if (oldspot == -1) { updateOld = true;  }
+                else {  //check for bases inbetween two 1's
+                    if (charInMiddle.count(base) != 0) { //we want to covert to an N
+                        sequence = sequence.substr(0, oldspot+1);
+                        sequence += 'N';
+                    }
+                    updateOld = true;
+                    charInMiddle.clear();
+                }
+            }
                         
                         for(int j=0;j<intensity;j++){
                                 sequence += base;
                         }
+            
+            if (updateOld) { oldspot = sequence.length()-1;  updateOld = false; }
                 }
          
                 if(sequence.size() > 4){
@@ -144,7 +166,7 @@ void FlowData::translateFlow(){
                 else{
                         sequence = "NNNN";
                 }
-       }
+    }
         catch(exception& e) {
                 m->errorOut(e, "FlowData", "translateFlow");
                 exit(1);
diff --git a/getsharedotucommand.cpp b/getsharedotucommand.cpp

index 5ae8da588ba4a6d36b6658527d8cd12e208c50c4..1074d323e9aaeae08b69a2d546fdb8cae544d330 100644 (file)
--- a/getsharedotucommand.cpp
+++ b/getsharedotucommand.cpp
@@ -13,13 +13,14 @@
  //**********************************************************************************************************************
  vector<string> GetSharedOTUCommand::setParameters(){   
         try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false); parameters.push_back(pfasta);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(pgroup);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(plist);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "sharedFasta", "none", "none","fasta",false,false); parameters.push_back(pfasta);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "groupList","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "sharedList", "sharedList", "groupList","sharedseq",false,false,true); parameters.push_back(plist);
+        CommandParameter pshared("shared", "InputTypes", "", "", "sharedList-sharedFasta", "sharedList", "none","sharedseq",false,false,true); parameters.push_back(pshared);
                 CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "","",false,false); parameters.push_back(poutput);
                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-               CommandParameter punique("unique", "String", "", "", "", "", "","",false,false,true); parameters.push_back(punique);
-               CommandParameter pshared("shared", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pshared);
+               CommandParameter puniquegroups("uniquegroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(puniquegroups);
+               CommandParameter psharedgroups("sharedgroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(psharedgroups);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
  
@@ -36,18 +37,18 @@ vector<string> GetSharedOTUCommand::setParameters(){
  string GetSharedOTUCommand::getHelpString(){   
         try {
                 string helpString = "";
-               helpString += "The get.sharedseqs command parameters are list, group, label, unique, shared, output and fasta.  The list and group parameters are required, unless you have valid current files.\n";
+               helpString += "The get.sharedseqs command parameters are list, group, shared, label, uniquegroups, sharedgroups, output and fasta.  The list and group or shared parameters are required, unless you have valid current files.\n";
                 helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
-               helpString += "The unique and shared parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
-               helpString += "If you enter your groups under the unique parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
-               helpString += "If you enter your groups under the shared parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
-               helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group file.\n";
-               helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified.\n";
+               helpString += "The uniquegroups and sharedgroups parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
+               helpString += "If you enter your groups under the uniquegroups parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
+               helpString += "If you enter your groups under the sharedgroups parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
+               helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group or shared file.\n";
+               helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified. It can only be used with a list and group file not the shared file input.\n";
                 helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
                 helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
                 helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
-               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
-               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
+               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, uniquegroups=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
+               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=amazon.groups, uniquegroups=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
                 helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
                 helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
@@ -145,6 +146,14 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
                                 
                                 it = parameters.find("group");
                                 //user has given a template file
@@ -159,28 +168,53 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                         //check for required parameters
                         listfile = validParameter.validFile(parameters, "list", true);
                         if (listfile == "not open") { abort = true; }
-                       else if (listfile == "not found") { 
-                               listfile = m->getListFile(); 
-                               if (listfile != "") { format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
-                               else { 
-                                       m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); 
-                                       abort = true;
-                               }
-                       }else {  format = "list";       m->setListFile(listfile); }
+                       else if (listfile == "not found") { listfile = "";                      }
+            else {  format = "list";   m->setListFile(listfile); }
                         
                         groupfile = validParameter.validFile(parameters, "group", true);
                         if (groupfile == "not open") { abort = true; }  
-                       else if (groupfile == "not found") { 
-                               groupfile = m->getGroupFile(); 
-                               if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
-                               else { 
-                                       m->mothurOut("No valid current group file. You must provide a group file."); m->mothurOutEndLine(); 
-                                       abort = true;
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
+            
+            sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { abort = true; }
+                       else if (sharedfile == "not found") { sharedfile = ""; }
+                       else { m->setSharedFile(sharedfile); }
+            
+            fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") {  fastafile = "";  }
+                       else { m->setFastaFile(fastafile); }
+
+            
+            if ((sharedfile == "") && (listfile == "")) { //look for currents
+                //is there are current file available for either of these?
+                               //give priority to shared, then list
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {
+                                       listfile = m->getListFile();
+                                       if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                                       else {
+                                               m->mothurOut("No valid current files. You must provide a shared or list file."); m->mothurOutEndLine();
+                                               abort = true;
+                                       }
                                 }
-                       }else { m->setGroupFile(groupfile); }
-                                               
-                       if ((listfile == "") || (groupfile == "")) { m->mothurOut("The list and group parameters are required."); m->mothurOutEndLine(); abort = true; }
+            }else if ((sharedfile != "") && (listfile != "")) {
+                m->mothurOut("You may enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true;
+            }
                         
+            if (listfile != "") {
+                               if (groupfile == ""){
+                                       groupfile = m->getGroupFile();
+                                       if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                                       else { m->mothurOut("You need to provide a group file if you are going to use the list format."); m->mothurOutEndLine(); abort = true;
+                                       }
+                               }
+                       }
+
+                       if ((sharedfile != "") && (fastafile != "")) { m->mothurOut("You cannot use the fasta file with the shared file."); m->mothurOutEndLine(); abort = true; }
+            
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
                         label = validParameter.validFile(parameters, "label", false);                   
@@ -194,29 +228,22 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                         if (output == "not found") { output = ""; }
                         else if (output == "default") { output = ""; }
                         
-                       groups = validParameter.validFile(parameters, "unique", false);                 
+                       groups = validParameter.validFile(parameters, "uniquegroups", false);                   
                         if (groups == "not found") { groups = ""; }
                         else { 
                                 userGroups = "unique." + groups;
                                 m->splitAtDash(groups, Groups);
-                               m->setGroups(Groups);
-                               
                         }
                         
-                       groups = validParameter.validFile(parameters, "shared", false);                 
+                       groups = validParameter.validFile(parameters, "sharedgroups", false);                   
                         if (groups == "not found") { groups = "";  }
                         else { 
                                 userGroups = groups;
                                 m->splitAtDash(groups, Groups);
-                               m->setGroups(Groups);
                                 unique = false;
                         }
                         
-                       fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
-                       else if (fastafile == "not found") {  fastafile = "";  }        
-                       else { m->setFastaFile(fastafile); }
-               }
+        }
  
         }
         catch(exception& e) {
@@ -231,125 +258,128 @@ int GetSharedOTUCommand::execute(){
                 
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                 
-               groupMap = new GroupMap(groupfile);
-               int error = groupMap->readMap();
-               if (error == 1) { delete groupMap; return 0; }
-               
-               if (m->control_pressed) { delete groupMap; return 0; }
-               
-               if (Groups.size() == 0) {
-                       Groups = groupMap->getNamesOfGroups();
-                       
-                       //make string for outputfile name
-                       userGroups = "unique.";
-                       for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
-                       userGroups = userGroups.substr(0, userGroups.length()-1);
-               }else{
-                       //sanity check for group names
-                       SharedUtil util;
-                       vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
-                       util.setGroups(Groups, namesOfGroups);
-                       groupMap->setNamesOfGroups(namesOfGroups);
-               }
-       
-               //put groups in map to find easier
-               for(int i = 0; i < Groups.size(); i++) {
-                       groupFinder[Groups[i]] = Groups[i];
-               }
-               
-               if (fastafile != "") {
-                       ifstream inFasta;
-                       m->openInputFile(fastafile, inFasta);
-                       
-                       while(!inFasta.eof()) {
-                               if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
-                               
-                               Sequence seq(inFasta); m->gobble(inFasta);
-                               if (seq.getName() != "") {  seqs.push_back(seq);   }
-                       }
-                       inFasta.close();
-               }
-               
-               ListVector* lastlist = NULL;
-               string lastLabel = "";
-               
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> processedLabels;
-               set<string> userLabels = labels;
-               
-               ifstream in;
-               m->openInputFile(listfile, in);
-               
-               //as long as you are not at the end of the file or done wih the lines you want
-               while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
-                       
-                       if (m->control_pressed) { 
-                               if (lastlist != NULL) {         delete lastlist;        }
-                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }  outputTypes.clear();
-                               delete groupMap; return 0;
-                       }
-                       
-                       list = new ListVector(in);
-                       
-                       if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
-                               m->mothurOut(list->getLabel()); 
-                               process(list);
-                               
-                               processedLabels.insert(list->getLabel());
-                               userLabels.erase(list->getLabel());
-                       }
-                       
-                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                                       string saveLabel = list->getLabel();
-                                       
-                                       m->mothurOut(lastlist->getLabel()); 
-                                       process(lastlist);
-                                       
-                                       processedLabels.insert(lastlist->getLabel());
-                                       userLabels.erase(lastlist->getLabel());
-                                       
-                                       //restore real lastlabel to save below
-                                       list->setLabel(saveLabel);
-                       }
+        if ( sharedfile != "") { runShared(); }
+        else {
+            m->setGroups(Groups);
+            groupMap = new GroupMap(groupfile);
+            int error = groupMap->readMap();
+            if (error == 1) { delete groupMap; return 0; }
+            
+            if (m->control_pressed) { delete groupMap; return 0; }
+            
+            if (Groups.size() == 0) {
+                Groups = groupMap->getNamesOfGroups();
+                
+                //make string for outputfile name
+                userGroups = "unique.";
+                for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
+                userGroups = userGroups.substr(0, userGroups.length()-1);
+            }else{
+                //sanity check for group names
+                SharedUtil util;
+                vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
+                util.setGroups(Groups, namesOfGroups);
+                groupMap->setNamesOfGroups(namesOfGroups);
+            }
+        
+            //put groups in map to find easier
+            for(int i = 0; i < Groups.size(); i++) {
+                groupFinder[Groups[i]] = Groups[i];
+            }
+            
+            if (fastafile != "") {
+                ifstream inFasta;
+                m->openInputFile(fastafile, inFasta);
+                
+                while(!inFasta.eof()) {
+                    if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
+                    
+                    Sequence seq(inFasta); m->gobble(inFasta);
+                    if (seq.getName() != "") {  seqs.push_back(seq);   }
+                }
+                inFasta.close();
+            }
+            
+            ListVector* lastlist = NULL;
+            string lastLabel = "";
+            
+            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+            set<string> processedLabels;
+            set<string> userLabels = labels;
+            
+            ifstream in;
+            m->openInputFile(listfile, in);
+            
+            //as long as you are not at the end of the file or done wih the lines you want
+            while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
+                
+                if (m->control_pressed) { 
+                    if (lastlist != NULL) {            delete lastlist;        }
+                    for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); }  outputTypes.clear();
+                    delete groupMap; return 0;
+                }
+                
+                list = new ListVector(in);
+                
+                if(allLines == 1 || labels.count(list->getLabel()) == 1){                      
+                    m->mothurOut(list->getLabel()); 
+                    process(list);
+                    
+                    processedLabels.insert(list->getLabel());
+                    userLabels.erase(list->getLabel());
+                }
+                
+                if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                        string saveLabel = list->getLabel();
+                        
+                        m->mothurOut(lastlist->getLabel()); 
+                        process(lastlist);
+                        
+                        processedLabels.insert(lastlist->getLabel());
+                        userLabels.erase(lastlist->getLabel());
+                        
+                        //restore real lastlabel to save below
+                        list->setLabel(saveLabel);
+                }
  
-                       lastLabel = list->getLabel();
-                       
-                       if (lastlist != NULL) {         delete lastlist;        }
-                       lastlist = list;                        
-               }
-               
-               in.close();
-               
-               //output error messages about any remaining user labels
-               set<string>::iterator it;
-               bool needToRun = false;
-               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       m->mothurOut("Your file does not include the label " + *it); 
-                       if (processedLabels.count(lastLabel) != 1) {
-                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
-                               needToRun = true;
-                       }else {
-                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
-                       }
-               }
-               
-               //run last label if you need to
-               if (needToRun == true)  {
-                               m->mothurOut(lastlist->getLabel()); 
-                               process(lastlist);
-                                       
-                               processedLabels.insert(lastlist->getLabel());
-                               userLabels.erase(lastlist->getLabel());
-               }
-               
+                lastLabel = list->getLabel();
+                
+                if (lastlist != NULL) {                delete lastlist;        }
+                lastlist = list;                       
+            }
+            
+            in.close();
+            
+            //output error messages about any remaining user labels
+            set<string>::iterator it;
+            bool needToRun = false;
+            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                m->mothurOut("Your file does not include the label " + *it); 
+                if (processedLabels.count(lastLabel) != 1) {
+                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                    needToRun = true;
+                }else {
+                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                }
+            }
+            
+            //run last label if you need to
+            if (needToRun == true)  {
+                    m->mothurOut(lastlist->getLabel()); 
+                    process(lastlist);
+                        
+                    processedLabels.insert(lastlist->getLabel());
+                    userLabels.erase(lastlist->getLabel());
+            }
+            
  
-               //reset groups parameter
-               m->clearGroups();  
-               
-               if (lastlist != NULL) {         delete lastlist;        }
-               
-               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
-               
+            //reset groups parameter
+            m->clearGroups();  
+            
+            if (lastlist != NULL) {            delete lastlist;        }
+            
+            if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
+               }
                 //set fasta file as new current fastafile
                 string current = "";
                 itTypes = outputTypes.find("fasta");
@@ -521,5 +551,206 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                 exit(1);
         }
  }
+/***********************************************************/
+int GetSharedOTUCommand::runShared() {
+       try {
+        InputData input(sharedfile, "sharedfile");
+               vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+        
+        if (Groups.size() == 0) {
+            Groups = m->getGroups();
+            
+            //make string for outputfile name
+            userGroups = "unique.";
+            for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
+            userGroups = userGroups.substr(0, userGroups.length()-1);
+        }else {
+            //sanity check for group names
+            SharedUtil util;
+            vector<string> allGroups = m->getAllGroups();
+            util.setGroups(Groups, allGroups);
+        }
+        
+        //put groups in map to find easier
+        for(int i = 0; i < Groups.size(); i++) {
+            groupFinder[Groups[i]] = Groups[i];
+        }
+
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+        
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       if (m->control_pressed) {
+                outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }
+                for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
+                       }
+            
+            
+                       if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                               m->mothurOut(lookup[0]->getLabel()); 
+                               process(lookup);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                
+                m->mothurOut(lookup[0]->getLabel()); 
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();
+            
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                       lookup = input.getSharedRAbundVectors();
+               }
+               
+               if (m->control_pressed) {
+            outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }
+            for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
+        }
+        
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {
+                       m->mothurOut("Your file does not include the label " + *it);
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {        delete lookup[i];       } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); 
+            process(lookup);
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+               }
+        
+               //reset groups parameter
+               m->clearGroups();  
+               
+               return 0;
+
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetSharedOTUCommand", "runShared");
+               exit(1);
+       }
+}
+/***********************************************************/
+int GetSharedOTUCommand::process(vector<SharedRAbundVector*>& lookup) {
+       try {
+               
+               string outputFileNames;
+               if (outputDir == "") { outputDir += m->hasPath(sharedfile); }
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+        variables["[group]"] = userGroups;
+               if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); }
+               else { outputFileNames = getOutputFileName("accnos", variables); }
+        
+        ofstream outNames;
+               m->openOutputFile(outputFileNames, outNames);
+               
+               bool wroteSomething = false;
+               int num = 0;
+        
+               //go through each bin, find out if shared
+               for (int i = 0; i < lookup[0]->getNumBins(); i++) {
+                       if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
+                       
+                       bool uniqueOTU = true;
+                       map<string, int> atLeastOne;
+                       for (int f = 0; f < Groups.size(); f++) {  atLeastOne[Groups[f]] = 0;  }
+                       
+                       set<string> namesOfGroupsInThisBin;
+                       
+                       for(int j = 0; j < lookup.size(); j++) {
+                               string seqGroup = lookup[j]->getGroup();
+                string name = m->currentBinLabels[i];
+                               
+                if (lookup[j]->getAbundance(i) != 0) {
+                    if (output != "accnos") {
+                        namesOfGroupsInThisBin.insert(name + "|" + seqGroup + "|" + toString(lookup[j]->getAbundance(i)));
+                    }else {  namesOfGroupsInThisBin.insert(name);      }
+                    
+                    //is this seq in one of the groups we care about
+                    it = groupFinder.find(seqGroup);
+                    if (it == groupFinder.end()) {  uniqueOTU = false;  } //you have sequences from a group you don't want
+                    else {  atLeastOne[seqGroup]++;  }
+                               }
+                       }
+                       
+                       //make sure you have at least one seq from each group you want
+                       bool sharedByAll = true;
+                       map<string, int>::iterator it2;
+                       for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) {
+                               if (it2->second == 0) {  sharedByAll = false;   }
+                       }
+                       
+                       //if the user wants unique bins and this is unique then print
+                       //or this the user wants shared bins and this bin is shared then print
+                       if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) {
+                               
+                               wroteSomething = true;
+                               num++;
+                               
+                               //output list of names
+                               for (set<string>::iterator itNames = namesOfGroupsInThisBin.begin(); itNames != namesOfGroupsInThisBin.end(); itNames++) {
+                                       outNames << (*itNames) << endl;
+                               }
+                       }
+               }
+               outNames.close();
+               
+               if (!wroteSomething) {
+                       m->mothurRemove(outputFileNames);
+                       string outputString = "\t" + toString(num) + " - No otus shared by groups";
+                       
+                       string groupString = "";
+                       for (int h = 0; h < Groups.size(); h++) {
+                               groupString += "  " + Groups[h];
+                       }
+                       
+                       outputString += groupString + ".";
+                       m->mothurOut(outputString); m->mothurOutEndLine();
+               }else {
+                       m->mothurOut("\t" + toString(num)); m->mothurOutEndLine();
+                       outputNames.push_back(outputFileNames);
+                       if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); }
+                       else { outputTypes["accnos"].push_back(outputFileNames); }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetSharedOTUCommand", "process");
+               exit(1);
+       }
+}
  
  //**********************************************************************************************************************
diff --git a/getsharedotucommand.h b/getsharedotucommand.h

index 583c2e001a00607cd97771e8fdfa42cbd8866c9f..f03b5ddcf6b5de00eba42210d39c74e46848d73c 100644 (file)
--- a/getsharedotucommand.h
+++ b/getsharedotucommand.h
@@ -14,6 +14,8 @@
  #include "listvector.hpp"
  #include "sequence.hpp"
  #include "groupmap.h"
+#include "sharedrabundvector.h"
+#include "inputdata.h"
  
  //**********************************************************************************************************************
  class GetSharedOTUCommand : public Command {
@@ -44,7 +46,7 @@ class GetSharedOTUCommand : public Command {
                 GroupMap* groupMap;
                 
                 set<string> labels;
-               string fastafile, label, groups, listfile, groupfile, output, userGroups, outputDir, format;
+               string fastafile, label, groups, listfile, groupfile, sharedfile, output, userGroups, outputDir, format;
                 bool abort, allLines, unique;
                 vector<string> Groups;
                 map<string, string> groupFinder;
@@ -53,6 +55,8 @@ class GetSharedOTUCommand : public Command {
                 vector<string> outputNames;
                 
                 int process(ListVector*);
+        int process(vector<SharedRAbundVector*>&);
+        int runShared();
                 
  };
  //**********************************************************************************************************************
diff --git a/homovacommand.cpp b/homovacommand.cpp

index 49b8b64a0ebea8d5c6dae20118a19e0b814806b1..488b68e0b6b69a8eb8e351d39be9ff09193fb36f 100644 (file)
--- a/homovacommand.cpp
+++ b/homovacommand.cpp
@@ -308,7 +308,7 @@ double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> >
                         vector<double> ssWithinRandVector;
                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
                         double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
-                       if(bValueRand > bValueOrig){    counter++;      }
+                       if(bValueRand >= bValueOrig){   counter++;      }
                 }
                 
                 double pValue = (double) counter / (double) iters;
diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp

index 704e752341320dd75b9f213658d74530583ec9cb..74e3e2bc9f0bdedc88f90e7f923eafdb511470d6 100644 (file)
--- a/parsefastaqcommand.cpp
+++ b/parsefastaqcommand.cpp
@@ -16,6 +16,7 @@ vector<string> ParseFastaQCommand::setParameters(){
                 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
                 CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
+        CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
                 CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
          CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -39,6 +40,7 @@ string ParseFastaQCommand::getHelpString(){
                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
          helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
          helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
+        helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n";
                 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
                 helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
                 return helpString;
@@ -132,7 +134,11 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                         fasta = m->isTrue(temp); 
  
                         temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
-                       qual = m->isTrue(temp); 
+                       qual = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "pacbio", false);      if(temp == "not found"){        temp = "F";     }
+                       pacbio = m->isTrue(temp);
+
                         
              format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
              
@@ -209,22 +215,33 @@ int ParseFastaQCommand::execute(){
                         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
                         if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                         
-                       //print sequence info to files
-                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-                       
-                       if (qual) { 
-                               vector<int> qualScores = convertQual(quality);
+            vector<int> qualScores;
+            if (qual) {
+                               qualScores = convertQual(quality);
                                 outQual << ">" << name << endl;
                                 for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
                                 outQual << endl;
                         }
+            
+            if (m->control_pressed) { break; }
+            
+            if (pacbio) {
+                if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
+                for (int i = 0; i < qualScores.size(); i++) {
+                    if (qualScores[i] == 0){ sequence[i] = 'N'; }
+                }
+            }
+            
+                       //print sequence info to files
+                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
+                       
                 }
                 
                 in.close();
                 if (fasta)      { outFasta.close();     }
                 if (qual)       { outQual.close();      }
                 
-               if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
+               if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
                 
                 //set fasta file as new current fastafile
                 string current = "";
diff --git a/parsefastaqcommand.h b/parsefastaqcommand.h

index cb86bd66c0ae8597f605006893fd70371d16b5fc..5ae6b9c482fe6b4022bb4d4c5d4ae2e79ea3aad8 100644 (file)
--- a/parsefastaqcommand.h
+++ b/parsefastaqcommand.h
@@ -36,7 +36,7 @@ private:
  
         vector<string> outputNames;     
         string outputDir, fastaQFile, format;
-       bool abort, fasta, qual;
+       bool abort, fasta, qual, pacbio;
         
         vector<int> convertQual(string);
      vector<char> convertTable;
diff --git a/qualityscores.h b/qualityscores.h

index 87699739d34ad012b121a295be53f976b122a5b7..500d3e98db79b934effee830b65f54afb0023af3 100644 (file)
--- a/qualityscores.h
+++ b/qualityscores.h
@@ -38,7 +38,7 @@ public:
         void updateReverseMap(vector<vector<int> >&, int, int, int);
      void setName(string n); 
      void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
-    
+    vector<int> getScores() { return qScores; }
         
  private:
         
diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp

index e48d50794d15b4b77dae6a15134d9bec54c8a911..ecd6f0ceb723290337f5856f724710eff7a751ee 100644 (file)
--- a/seqerrorcommand.cpp
+++ b/seqerrorcommand.cpp
@@ -407,6 +407,13 @@ int SeqErrorCommand::execute(){
                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");
                 m->mothurOutEndLine();
                 
+        //set fasta file as new current fastafile
+               string current = "";
+               itTypes = outputTypes.find("errorseq");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+               }
+        
                 m->mothurOutEndLine();
                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp

index 58380942b021bfa370d71e5103118826924b5f67..3abc7609ac85187513f416c378225e726c8bd197 100644 (file)
--- a/trimflowscommand.cpp
+++ b/trimflowscommand.cpp
@@ -491,7 +491,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                  if (pos == string::npos) {             
                      flowData.printFlows(trimFlowFile);
                      
-                    if(fasta)  {       currSeq.printSequence(fastaFile);       }
+                    if(fasta)  { currSeq.printSequence(fastaFile);     }
                      
                      if(allFiles){
                          ofstream output;
diff --git a/trimflowscommand.h b/trimflowscommand.h

index 065e6b4532e071383c39c54e38047b01a2b45344..f5493eb7517db72c20048da57080a587ae675f7e 100644 (file)
--- a/trimflowscommand.h
+++ b/trimflowscommand.h
@@ -224,7 +224,7 @@ static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){
                                 
                                 flowData.printFlows(trimFlowFile);
                                 
-                               if(pDataArray->fasta)   {       currSeq.printSequence(fastaFile);       }
+                               if(pDataArray->fasta)   {       currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile);   }
                                 
                                 if(pDataArray->allFiles){
                                         ofstream output;
diff --git a/trimoligos.cpp b/trimoligos.cpp

index e0f093c011e5c3ce97f62a22421e4145bded0325..91bdb83ad2d3a7f9e97713dba66709018d89908c 100644 (file)
--- a/trimoligos.cpp
+++ b/trimoligos.cpp
@@ -18,6 +18,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
         try {
                 m = MothurOut::getInstance();
          paired = false;
+        trashCode = "";
                 
                 pdiffs = p;
                 bdiffs = b;
@@ -73,6 +74,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<
          ldiffs = l;
          sdiffs = s;
          paired = true;
+        trashCode = "";
                 
          maxFBarcodeLength = 0;
          for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
@@ -147,6 +149,7 @@ TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, v
                 primers = pr;
                 revPrimer = r;
          paired = false;
+        trashCode = "";
          
          maxFBarcodeLength = 0;
          for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
@@ -178,6 +181,7 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
         try {
                 
                 string rawSequence = seq.getUnaligned();
+        trashCode = "";
                 
                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
                         string oligo = it->first;
@@ -200,7 +204,7 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
                 
          primerStart = 0; primerEnd = 0;
          //if you don't want to allow for diffs
-               if (pdiffs == 0) { return false;  }
+               if (pdiffs == 0) { trashCode = "f"; return false;  }
                 else { //try aligning and see if you can find it
                                                 
                         //can you find the barcode
@@ -253,11 +257,12 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
                         
              if (alignment != NULL) {  delete alignment;  }
              
-                       if(minDiff > pdiffs)    {       primerStart = 0; primerEnd = 0; return false;           }       //no good matches
-                       else if(minCount > 1)   {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
-                       else{  return true; }
+                       if(minDiff > pdiffs)    {       primerStart = 0; primerEnd = 0; trashCode = "f"; return false;          }       //no good matches
+                       else if(minCount > 1)   {       primerStart = 0; primerEnd = 0; trashCode = "f"; return false;  }       //can't tell the difference between multiple primers
+                       else{  trashCode = ""; return true; }
                 }
-       
+        
+        trashCode = "f";
          primerStart = 0; primerEnd = 0;
                 return false;
                 
@@ -270,7 +275,7 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
  //******************************************************************/
  bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
         try {
-               
+               trashCode = "";
                 string rawSequence = seq.getUnaligned();
                 
                 for(int i=0;i<revPrimer.size();i++){
@@ -292,6 +297,7 @@ bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
              }
                 }       
                 
+        trashCode = "r";
          primerStart = 0; primerEnd = 0;
                 return false;
         }
@@ -304,7 +310,7 @@ bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
  
  int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
         try {
-               
+               trashCode = "";
          if (paired) {  int success = stripPairedBarcode(seq, qual, group);  return success; }
          
                 string rawSequence = seq.getUnaligned();
@@ -327,12 +333,13 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                                 }
                                 
                                 success = 0;
-                               break;
+                trashCode = "";
+                               return success;
                         }
                 }
                 
                 //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
+               if (bdiffs == 0) { trashCode = "b"; return success;  }
                 
                 else { //try aligning and see if you can find it
                         Alignment* alignment;
@@ -350,7 +357,7 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                                 //                              int length = oligo.length();
                                 
                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
-                                       success = bdiffs + 10;
+                                       success = bdiffs + 10; trashCode = "b";
                                         break;
                                 }
                                 
@@ -385,8 +392,8 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                                 
                         }
                         
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
+                       if(minDiff > bdiffs)    {       trashCode = "b"; success = minDiff;             }       //no good matches
+                       else if(minCount > 1)   {       trashCode = "b"; success = bdiffs + 100;        }       //can't tell the difference between multiple barcodes
                         else{                                                                                                   //use the best match
                                 group = minGroup;
                                 seq.setUnaligned(rawSequence.substr(minPos));
@@ -395,6 +402,7 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                                         qual.trimQScores(minPos, -1);
                                 }
                                 success = minDiff;
+                trashCode = "";
                         }
                         
                         if (alignment != NULL) {  delete alignment;  }
@@ -412,6 +420,7 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
  //*******************************************************************/
  int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
         try {
+        trashCode = "";
                 //look for forward barcode
                 string rawFSequence = forwardSeq.getUnaligned();
          string rawRSequence = reverseSeq.getUnaligned();
@@ -424,25 +433,33 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
              
                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
-                               break;  
+                trashCode += "b";
+                break;
                         }
              if(rawRSequence.length() < roligo.length()){       //let's just assume that the barcodes are the same length
                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                trashCode += "d";
                                 break;  
                         }
                         
-                       if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
-                               group = it->first;
-                               forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
-                               success = 0;
-                               break;
-                       }
+                       if(compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+                if (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {
+                    group = it->first;
+                    forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
+                    reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                    success = 0; trashCode = "";
+                    return success;
+                }
+            }else {
+                trashCode = "b";
+                if (!compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) { trashCode += "d";  }
+            }
                 }
                 
                 //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
+               if (bdiffs == 0) {  return success;  }
                 else { //try aligning and see if you can find it
+            trashCode = "";
                         Alignment* alignment;
                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
                         else{ alignment = NULL; } 
@@ -518,8 +535,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                         }
              
                         //cout << minDiff << '\t' << minCount << '\t' << endl;
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else{   
+                       if(minDiff > bdiffs)    {       trashCode = "b"; minFGroup.clear(); success = minDiff;          }       //no good matches
+                       //else{
                  //check for reverse match
                  if (alignment != NULL) {  delete alignment;  }
                  
@@ -592,7 +609,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                      cout << endl;
                  }
                  cout << endl;*/
-                if(minDiff > bdiffs)   {       success = minDiff;              }       //no good matches
+                if(minDiff > bdiffs)   {       trashCode += "d"; success = minDiff;            }       //no good matches
                  else  {  
                      bool foundMatch = false;
                      vector<int> matches;
@@ -618,13 +635,16 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                          forwardSeq.setUnaligned(rawFSequence.substr(fStart));
                          reverseSeq.setUnaligned(rawRSequence.substr(rStart));
                          success = minDiff;
-                    }else { success = bdiffs + 100;    }
+                    }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100; }
                  }
-                       }
+                       //}
                         
                         if (alignment != NULL) {  delete alignment;  }
                 }
                 
+        //catchall for case I didn't think of
+        if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
+        
                 return success;
                 
         }
@@ -637,6 +657,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
  //*******************************************************************/
  int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
         try {
+        trashCode = "";
+        
                 //look for forward barcode
                 string rawFSequence = forwardSeq.getUnaligned();
          string rawRSequence = reverseSeq.getUnaligned();
@@ -649,27 +671,36 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
              
                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                trashCode = "b";
                                 break;  
                         }
              if(rawRSequence.length() < roligo.length()){       //let's just assume that the barcodes are the same length
                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                trashCode += "d";
                                 break;  
                         }
                         
-                       if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
-                               group = it->first;
-                               forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
-                forwardQual.trimQScores(foligo.length(), -1);
-                reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
-                               success = 0;
-                               break;
+                       if(compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+                if (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {
+                    group = it->first;
+                    forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
+                    reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                    forwardQual.trimQScores(foligo.length(), -1);
+                    reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+                    success = 0; trashCode = "";
+                    return success;
+                }
+            }else {
+                trashCode = "b";
+                if (!compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {  trashCode += "d";
+                }
                         }
                 }
                 
                 //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
+               if (bdiffs == 0) { return success;  }
                 else { //try aligning and see if you can find it
+            trashCode = "";
                         Alignment* alignment;
                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
                         else{ alignment = NULL; } 
@@ -745,8 +776,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                         }
              
                         //cout << minDiff << '\t' << minCount << '\t' << endl;
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else{   
+                       if(minDiff > bdiffs)    {       trashCode = "b"; minFGroup.clear(); success = minDiff;          }       //no good matches
+                       //else{
                  //check for reverse match
                  if (alignment != NULL) {  delete alignment;  }
                  
@@ -819,7 +850,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                   cout << endl;
                   }
                   cout << endl;*/
-                if(minDiff > bdiffs)   {       success = minDiff;              }       //no good matches
+                if(minDiff > bdiffs)   {       trashCode = "d"; success = minDiff;             }       //no good matches
                  else  {  
                      bool foundMatch = false;
                      vector<int> matches;
@@ -847,13 +878,16 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                          forwardQual.trimQScores(fStart, -1);
                          reverseQual.trimQScores(rStart, -1);
                          success = minDiff;
-                    }else { success = bdiffs + 100;    }
+                    }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100; }
                  }
-                       }
+                       //}
                         
                         if (alignment != NULL) {  delete alignment;  }
                 }
                 
+        //catchall for case I didn't think of
+        if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
+        
                 return success;
                 
         }
@@ -866,6 +900,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
  //*******************************************************************/
  int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
         try {
+        trashCode = "";
+        
                 //look for forward barcode
                 string rawSeq = seq.getUnaligned();
                 int success = bdiffs + 1;       //guilty until proven innocent
@@ -878,28 +914,36 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
              //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
                         if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                trashCode = "b";
                                 break;
                         }
              if(rawSeq.length() < roligo.length()){     //let's just assume that the barcodes are the same length
                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                trashCode += "d";
                                 break;
                         }
                         
-                       if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
-                               group = it->first;
-                string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
-                seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
-                if(qual.getName() != ""){
-                    qual.trimQScores(-1, rawSeq.length()-roligo.length());
-                    qual.trimQScores(foligo.length(), -1);
+                       if(compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) { //find forward
+                if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { //find reverse
+                    group = it->first;
+                    string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+                    seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+                    if(qual.getName() != ""){
+                        qual.trimQScores(-1, rawSeq.length()-roligo.length());
+                        qual.trimQScores(foligo.length(), -1);
+                    }
+                    success = 0;
+                    trashCode = "";
+                    return success;
                  }
-                               success = 0;
-                               break;
+            }else {
+                trashCode = "b";
+                               if (!compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { trashCode += "d"; }
                         }
                 }
                 //cout << "success=" << success << endl;
                 //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
+               if (bdiffs == 0) { return success;  }
                 else { //try aligning and see if you can find it
                         Alignment* alignment;
                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
@@ -976,8 +1020,8 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                         }
              
                         //cout << minDiff << '\t' << minCount << '\t' << endl;
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else{
+                       if(minDiff > bdiffs)    {       trashCode = "b"; minFGroup.clear(); success = minDiff;          }       //no good matches
+                       //else{
                  //check for reverse match
                  if (alignment != NULL) {  delete alignment;  }
                  
@@ -1052,7 +1096,7 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                   cout << endl;
                   }
                   cout << endl;*/
-                if(minDiff > bdiffs)   {       success = minDiff;              }       //no good matches
+                if(minDiff > bdiffs)   {       trashCode += "d"; success = minDiff;            }       //no good matches
                  else  {
                      bool foundMatch = false;
                      vector<int> matches;
@@ -1083,13 +1127,16 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                          }
                          success = minDiff;
                          //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
-                    }else { success = bdiffs + 100;    }
+                    }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100; }
                  }
-                       }
+                       //}
                         
                         if (alignment != NULL) {  delete alignment;  }
                 }
                 
+        //catchall for case I didn't think of
+        if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
+
                 return success;
                 
         }
@@ -1102,6 +1149,7 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
  //*******************************************************************/
  int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
         try {
+        trashCode = "";
                 //look for forward 
                 string rawSeq = seq.getUnaligned();
                 int success = pdiffs + 1;       //guilty until proven innocent
@@ -1115,30 +1163,37 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
              //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
                         if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                trashCode = "p";
                                 break;
                         }
              if(rawSeq.length() < roligo.length()){     //let's just assume that the barcodes are the same length
                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                trashCode += "q";
                                 break;
                         }
                         
-                       if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
-                               group = it->first;
-                if (!keepForward) { 
+            if(compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) { //find forward
+                if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { //find reverse
+                    group = it->first;
                      string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
                      seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
                      if(qual.getName() != ""){
                          qual.trimQScores(-1, rawSeq.length()-roligo.length());
                          qual.trimQScores(foligo.length(), -1);
                      }
+                    success = 0;
+                    trashCode = "";
+                    return success;
                  }
-                               success = 0;
-                               break;
+            }else {
+                trashCode = "b";
+                               if (!compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { trashCode += "d"; }
                         }
-               }
+
+        }
                 //cout << "success=" << success << endl;
                 //if you found the barcode or if you don't want to allow for diffs
-               if ((pdiffs == 0) || (success == 0)) { return success;  }
+               if (pdiffs == 0) { return success;  }
                 else { //try aligning and see if you can find it
                         Alignment* alignment;
                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
@@ -1215,8 +1270,8 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                         }
              
                         //cout << minDiff << '\t' << minCount << '\t' << endl;
-                       if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
-                       else{
+                       if(minDiff > pdiffs)    {       trashCode = "p"; minFGroup.clear(); success = minDiff;          }       //no good matches
+                       //else{
                  //check for reverse match
                  if (alignment != NULL) {  delete alignment;  }
                  
@@ -1291,7 +1346,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                   cout << endl;
                   }
                   cout << endl;*/
-                if(minDiff > pdiffs)   {       success = minDiff;              }       //no good matches
+                if(minDiff > pdiffs)   { trashCode += "q";     success = minDiff;              }       //no good matches
                  else  {
                      bool foundMatch = false;
                      vector<int> matches;
@@ -1324,13 +1379,16 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                          }
                          success = minDiff;
                          //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
-                    }else { success = pdiffs + 100;    }
+                    }else { if (trashCode == "") { trashCode = "pq"; } success = pdiffs + 100; }
                  }
-                       }
+                       //}
                         
                         if (alignment != NULL) {  delete alignment;  }
                 }
                 
+        //catchall for case I didn't think of
+        if ((trashCode == "") && (success > pdiffs)) { trashCode = "pq"; }
+        
                 return success;
                 
         }
diff --git a/trimoligos.h b/trimoligos.h

index df4643c87f1d5b7edd75c5384e7ff2a144a392c7..f3e695406d44a880ed2adc0029e730b6502185fc 100644 (file)
--- a/trimoligos.h
+++ b/trimoligos.h
@@ -56,11 +56,13 @@ class TrimOligos {
          bool findReverse(Sequence&, int&, int&);
      
          string reverseOligo(string);
+        string getTrashCode() { return trashCode; }
                                 
         
         private:
                 int pdiffs, bdiffs, ldiffs, sdiffs;
          bool paired;
+        string trashCode;
         
                 map<string, int> barcodes;
                 map<string, int> primers;
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp

index 709202f7d690c17e8a6edb7f5a7812047aec9177..935b9a9297e08d1f50e89d4b9a059cf9cbe8e54a 100644 (file)
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -684,16 +684,14 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
              //create reoriented primer and barcode pairs
              map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
              for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
-                cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
-                cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
-                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+                  oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
                  rpairedPrimers[it->first] = tempPair;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
              }
              for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
-                cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
-                cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
-                oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
                  rpairedBarcodes[it->first] = tempPair;
+                 //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
              }
              rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
          }
@@ -716,13 +714,15 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
  
                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+            Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
              
-                       QualityScores currQual;
+                       QualityScores currQual; QualityScores savedQual;
                         if(qFileName != ""){
                                 currQual = QualityScores(qFile);  m->gobble(qFile);
+                savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
                  //cout << currQual.getName() << endl;
                         }
-                       
+                         
                         string origSeq = currSeq.getUnaligned();
                         if (origSeq != "") {
                                 
@@ -751,7 +751,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                  
                                 if(numFPrimers != 0){
                                         success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
-                                       if(success > pdiffs)            {       trashCode += 'f';       }
+                                       if(success > pdiffs)            {
+                        //if (pairedOligos) {  trashCode += trimOligos->getTrashCode(); }
+                        //else {  trashCode += 'f';  }
+                        trashCode += 'f';
+                    }
                                         else{ currentSeqsDiffs += success;  }
                                 }
                                 
@@ -771,17 +775,21 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                      int thisPrimerIndex = 0;
                      
                      if(numBarcodes != 0){
-                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+                        thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
                          if(thisSuccess > bdiffs)               {       thisTrashCode += 'b';   }
                          else{ thisCurrentSeqsDiffs += thisSuccess;  }
                      }
                      
                      if(numFPrimers != 0){
-                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
-                        if(thisSuccess > pdiffs)               {       thisTrashCode += 'f';   }
+                        thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
+                            if(thisSuccess > pdiffs)           {
+                            //if (pairedOligos) {  thisTrashCode += rtrimOligos->getTrashCode(); }
+                            //else {  thisTrashCode += 'f';  }
+                            thisTrashCode += 'f'; 
+                        }
                          else{ thisCurrentSeqsDiffs += thisSuccess;  }
                      }
-                    
+                   
                      if (thisCurrentSeqsDiffs > tdiffs) {       thisTrashCode += 't';   }
                      
                      if (thisTrashCode == "") { 
@@ -790,9 +798,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                          currentSeqsDiffs = thisCurrentSeqsDiffs;
                          barcodeIndex = thisBarcodeIndex;
                          primerIndex = thisPrimerIndex;
-                        currSeq.reverseComplement();
+                        savedSeq.reverseComplement();
+                        currSeq.setAligned(savedSeq.getAligned());
                          if(qFileName != ""){
-                            currQual.flipQScores();
+                            savedQual.flipQScores();
+                            currQual.setScores(savedQual.getScores());
                          }
                      }
                  }
@@ -1555,12 +1565,14 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                         // get rest of line in case there is a primer name
                                         while (!inOligos.eof()) {
                                                 char c = inOligos.get();
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                 else if (c == 32 || c == 9){;} //space or tab
                                                 else {  group += c;  }
                                         }
                      
                      oligosPair newPrimer(oligo, roligo);
+                    
+                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
                                         
                                         //check for repeat barcodes
                      string tempPair = oligo+roligo;
@@ -1588,7 +1600,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                      string temp = "";
                      while (!inOligos.eof())    {
                                                 char c = inOligos.get();
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                 else if (c == 32 || c == 9){;} //space or tab
                                                 else {  temp += c;  }
                                         }
@@ -1604,7 +1616,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                              if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
                          }
                          
-                        oligosPair newPair(oligo, reverseOligo(reverseBarcode));
+                        reverseBarcode = reverseOligo(reverseBarcode);
+                        oligosPair newPair(oligo, reverseBarcode);
                          
                          if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
                          
diff --git a/trimseqscommand.h b/trimseqscommand.h

index 4d0a6112cb6e288600ae944e9340563f5973da47..de3d839901762893a7ce2c4d41ce5f9db2f9cbaa 100644 (file)
--- a/trimseqscommand.h
+++ b/trimseqscommand.h
@@ -261,7 +261,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                 
          TrimOligos* trimOligos = NULL;
          int numBarcodes = pDataArray->barcodes.size();
-        if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); }
+        if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
          else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
          
          TrimOligos* rtrimOligos = NULL;
@@ -283,7 +283,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                 for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
                                    
                         if (pDataArray->m->control_pressed) {
-                delete trimOligos;
+                delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; }
                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
                                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
                  if(pDataArray->qFileName != "")        {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
@@ -299,11 +299,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         int currentSeqsDiffs = 0;
              
                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+            Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
                         
-                       QualityScores currQual;
+                       QualityScores currQual; QualityScores savedQual;
                         if(pDataArray->qFileName != ""){
                                 currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
+                savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
                         }
+              
                         
                         string origSeq = currSeq.getUnaligned();
                         if (origSeq != "") {
@@ -353,13 +356,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                      int thisPrimerIndex = 0;
                      
                      if(numBarcodes != 0){
-                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+                        thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
                          if(thisSuccess > pDataArray->bdiffs)           {       thisTrashCode += 'b';   }
                          else{ thisCurrentSeqsDiffs += thisSuccess;  }
                      }
                      
                      if(pDataArray->numFPrimers != 0){
-                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
+                        thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
                          if(thisSuccess > pDataArray->pdiffs)           {       thisTrashCode += 'f';   }
                          else{ thisCurrentSeqsDiffs += thisSuccess;  }
                      }
@@ -372,6 +375,12 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                          currentSeqsDiffs = thisCurrentSeqsDiffs;
                          barcodeIndex = thisBarcodeIndex;
                          primerIndex = thisPrimerIndex;
+                        savedSeq.reverseComplement();
+                        currSeq.setAligned(savedSeq.getAligned());
+                        if(pDataArray->qFileName != ""){
+                            savedQual.flipQScores();
+                            currQual.setScores(savedQual.getScores());
+                        }
                      }
                  }
  
@@ -567,12 +576,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         }
                         
                         //report progress
-                       if((i) % 1000 == 0){    pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }
+                       if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                         
                 }
                 //report progress
                 if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                 
+        if (pDataArray->reorient) { delete rtrimOligos; }
                 delete trimOligos;
                 inFASTA.close();
                 trimFASTAFile.close();
author	Pat Schloss <pschloss@umich.edu>
	Tue, 12 Mar 2013 12:22:50 +0000 (08:22 -0400)
committer	Pat Schloss <pschloss@umich.edu>
	Tue, 12 Mar 2013 12:22:50 +0000 (08:22 -0400)
amovacommand.cpp		patch \| blob \| history
chimerauchimecommand.cpp		patch \| blob \| history
chimerauchimecommand.h		patch \| blob \| history
counttable.cpp		patch \| blob \| history
flowdata.cpp		patch \| blob \| history
getsharedotucommand.cpp		patch \| blob \| history
getsharedotucommand.h		patch \| blob \| history
homovacommand.cpp		patch \| blob \| history
parsefastaqcommand.cpp		patch \| blob \| history
parsefastaqcommand.h		patch \| blob \| history
qualityscores.h		patch \| blob \| history
seqerrorcommand.cpp		patch \| blob \| history
trimflowscommand.cpp		patch \| blob \| history
trimflowscommand.h		patch \| blob \| history
trimoligos.cpp		patch \| blob \| history
trimoligos.h		patch \| blob \| history
trimseqscommand.cpp		patch \| blob \| history
trimseqscommand.h		patch \| blob \| history