]> git.donarmstrong.com Git - mothur.git/commitdiff
added sets to amova and homova commands. added oligos to make.contigs. added metadat...
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 22 Oct 2012 15:40:41 +0000 (11:40 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 22 Oct 2012 15:40:41 +0000 (11:40 -0400)
37 files changed:
alignmentdb.cpp
alignnode.cpp
amovacommand.cpp
amovacommand.h
chimerauchimecommand.cpp
classify.cpp
classifyseqscommand.cpp
classifyseqscommand.h
consensusseqscommand.cpp
cooccurrencecommand.cpp
corraxescommand.cpp
formatcolumn.cpp
formatcolumn.h
formatmatrix.h
formatphylip.cpp
formatphylip.h
getoturepcommand.cpp
heatmapcommand.cpp
homovacommand.cpp
homovacommand.h
makebiomcommand.cpp
makebiomcommand.h
makecontigscommand.cpp
makecontigscommand.h
mothurout.cpp
mothurout.h
otuassociationcommand.cpp
otuassociationcommand.h
phylotypecommand.cpp
seqsummarycommand.cpp
sequence.cpp
sharedrabundfloatvector.cpp
subsample.cpp
trimoligos.cpp
trimoligos.h
trimseqscommand.cpp
unifracweightedcommand.cpp

index f59c5db7d6e6ef2d039ccdda8b48a6ea3fb6428a..4ce76ea487d4c34f9b9a8f5f56658a2352ff7721 100644 (file)
@@ -42,12 +42,14 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                        
                        numSeqs = templateSequences.size();
                        if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  }
-
+            
                }else {
                        int start = time(NULL);
                        m->mothurOutEndLine();
                        m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
-                       
+                       bool aligned = false;
+            int tempLength = 0;
+            
                        #ifdef USE_MPI  
                                int pid, processors;
                                vector<unsigned long long> positions;
@@ -102,6 +104,9 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                                                
                                                //save longest base
                                                if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
+                        if (tempLength != 0) {
+                            if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
+                        }else { tempLength = temp.getAligned().length(); }
                                        }
                                }
                                
@@ -125,6 +130,10 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                                        
                                        //save longest base
                                        if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
+                    
+                    if (tempLength != 0) {
+                        if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
+                    }else { tempLength = temp.getAligned().length(); }
                                }
                        }
                        fastaFile.close();
index ccd8fb03342d99b954c4f1fbb6efc3639ea3311b..08ba40c9573c0a018a1e633e4f6cd8b3da2db253 100755 (executable)
@@ -7,7 +7,7 @@
  *
  */
 
-#include "alignNode.h"
+#include "alignnode.h"
 #include "taxonomynode.h"
 
 #include "bayesian.h"
index c4260fb096dd8b0b253f5239f7ce265376d24ed5..f36027f9acb625ab9c02e906a90d5716aa66aa58 100644 (file)
 #include "amovacommand.h"
 #include "readphylipvector.h"
 #include "groupmap.h"
+#include "sharedutilities.h"
 
 
 //**********************************************************************************************************************
 vector<string> AmovaCommand::setParameters(){  
        try {
                CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
+        CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
                CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
                CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
@@ -37,9 +39,10 @@ string AmovaCommand::getHelpString(){
                string helpString = "";
                helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.";
                helpString += "The amova command outputs a .amova file.";
-               helpString += "The amova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required, unless you have valid current files.";
+               helpString += "The amova command parameters are phylip, iters, sets and alpha.  The phylip and design parameters are required, unless you have valid current files.";
                helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.";
                helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.";
+        helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
                helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000.";
                helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).";
                helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).";
@@ -161,6 +164,12 @@ AmovaCommand::AmovaCommand(string option) {
                        temp = validParameter.validFile(parameters, "alpha", false);
                        if (temp == "not found") { temp = "0.05"; }
                        m->mothurConvert(temp, experimentwiseAlpha); 
+            
+            string sets = validParameter.validFile(parameters, "sets", false);                 
+                       if (sets == "not found") { sets = ""; }
+                       else { 
+                               m->splitAtDash(sets, Sets);
+                       }
                }
        }
        catch(exception& e) {
@@ -184,7 +193,32 @@ int AmovaCommand::execute(){
                //read in distance matrix and square it
                ReadPhylipVector readMatrix(phylipFileName);
                vector<string> sampleNames = readMatrix.read(distanceMatrix);
-               
+        
+        if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
+            SharedUtil util; 
+            vector<string> dGroups = designMap->getNamesOfGroups();
+            util.setGroups(Sets, dGroups);  
+
+            for(int i=0;i<distanceMatrix.size();i++){
+                
+                if (m->control_pressed) { delete designMap; return 0; }
+                
+                string group = designMap->getGroup(sampleNames[i]);
+                
+                if (group == "not found") {
+                    m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+                }else if (!m->inUsersGroups(group, Sets)){  //not in set we want remove it
+                    //remove from all other rows
+                    for(int j=0;j<distanceMatrix.size();j++){
+                        distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
+                    }
+                    distanceMatrix.erase(distanceMatrix.begin()+i);
+                    sampleNames.erase(sampleNames.begin()+i);
+                    i--;
+                }
+            }
+        }
+        
                for(int i=0;i<distanceMatrix.size();i++){
                        for(int j=0;j<i;j++){
                                distanceMatrix[i][j] *= distanceMatrix[i][j];   
index 6b3c83a4fb09c51f23b01ff38abeb2d939e8e79a..17dbae650912c5f4c21e0c8924dce3a676cc4fda 100644 (file)
@@ -38,7 +38,7 @@ private:
        map<string, vector<int> > getRandomizedGroups(map<string, vector<int> >);
 
        bool abort;
-       vector<string> outputNames;
+       vector<string> outputNames, Sets;
 
        string outputDir, inputDir, designFileName, phylipFileName;
        GroupMap* designMap;
index ae011906ded5c95b3aec8bfb57047e27ae42392b..6f7ba106ead04e9ccd3b7b290dce4c3a55dabd20 100644 (file)
@@ -1452,15 +1452,21 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                        
                        string name = "";
                        string chimeraFlag = "";
-                       in >> chimeraFlag >> name;
-                       
-                       //fix name if needed
-                       if (templatefile == "self") { 
-                               name = name.substr(0, name.length()-1); //rip off last /
-                               name = name.substr(0, name.find_last_of('/'));
+                       //in >> chimeraFlag >> name;
+                       
+            string line = m->getline(in);
+            vector<string> pieces = m->splitWhiteSpace(line);
+            if (pieces.size() > 2) { 
+                name = pieces[1];
+                //fix name if needed
+                if (templatefile == "self") { 
+                    name = name.substr(0, name.length()-1); //rip off last /
+                    name = name.substr(0, name.find_last_of('/'));
+                }
+                
+                chimeraFlag = pieces[pieces.size()-1];
                        }
-                       
-                       for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
+                       //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
                        m->gobble(in);
                        
                        if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
index 8aa3cdb381ed7a389667ce61962d47cefac15ddd..15ef0aa75dfc5793d1d1bef68996f477134e99a3 100644 (file)
@@ -23,7 +23,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); }
                
                taxFile = tfile;
-               readTaxonomy(taxFile);  
+               
                int numSeqs = 0;
                
                if (tempFile == "saved") {
@@ -74,11 +74,6 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                        
                        database->setNumSeqs(numSeqs);
                        
-                       //sanity check
-                       bool okay = phyloTree->ErrorCheck(names);
-                       
-                       if (!okay) { m->control_pressed = true; }
-                       
                        m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine();  
                        
                }else {
@@ -221,18 +216,19 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                fastaFile.close();
                        }
        #endif  
-               
+               
                        database->setNumSeqs(names.size());
                        
-                       //sanity check
-                       bool okay = phyloTree->ErrorCheck(names);
-                       
-                       if (!okay) { m->control_pressed = true; }
-                       
                        m->mothurOut("DONE."); m->mothurOutEndLine();
                        m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
                }
-
+        
+        readTaxonomy(taxFile);
+        
+        //sanity check
+        bool okay = phyloTree->ErrorCheck(names);
+        
+        if (!okay) { m->control_pressed = true; }
        }
        catch(exception& e) {
                m->errorOut(e, "Classify", "generateDatabaseAndNames");
@@ -299,9 +295,13 @@ int Classify::readTaxonomy(string file) {
                        iss >> name; m->gobble(iss);
             iss >> taxInfo;
             if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); }
-                       taxonomy[name] = taxInfo;
-                       phyloTree->addSeqToTree(name, taxInfo);
-               }
+                       if (m->inUsersGroups(name, names)) {
+                taxonomy[name] = taxInfo;
+                phyloTree->addSeqToTree(name, taxInfo);
+            }else {
+                m->mothurOut("[WARNING]: " + name + " is in your taxonomy file and not in your reference file, ignoring.\n");
+            }          
+        }
                
                MPI_File_close(&inMPI);
                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
@@ -309,7 +309,16 @@ int Classify::readTaxonomy(string file) {
         
         taxonomy.clear(); 
         m->readTax(file, taxonomy);
-        for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) {  phyloTree->addSeqToTree(itTax->first, itTax->second);  }
+        map<string, string> tempTaxonomy;
+        for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) {  
+            if (m->inUsersGroups(itTax->first, names)) {
+                phyloTree->addSeqToTree(itTax->first, itTax->second); 
+                tempTaxonomy[itTax->first] = itTax->second;
+            }else {
+                m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n");
+            }
+        }
+        taxonomy = tempTaxonomy;
 #endif 
                phyloTree->assignHeirarchyIDs(0);
                
index 36bccc2b205635c6b1b0df872332534d615e3bb1..7f262bf9019de7e26d611e9d8b6c37c9f47e7cc6 100644 (file)
@@ -658,7 +658,6 @@ int ClassifySeqsCommand::execute(){
                        }
                        
                        outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
-                       outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
                        outputNames.push_back(taxSummary);      outputTypes["taxsummary"].push_back(taxSummary);
                        
                        int start = time(NULL);
@@ -782,7 +781,9 @@ int ClassifySeqsCommand::execute(){
                        }
 #endif
                        
-                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); }
+                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
+                outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
+            }else { m->mothurRemove(newaccnosFile); }
 
                m->mothurOutEndLine();
                m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
@@ -895,12 +896,12 @@ int ClassifySeqsCommand::execute(){
                        #ifdef USE_MPI  
                                }
                        #endif
-
-                       m->mothurOutEndLine();
-                       m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-                       m->mothurOutEndLine();
                }
+        
+        m->mothurOutEndLine();
+        m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+        for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+        m->mothurOutEndLine();
                
                //set taxonomy file as new current taxonomyfile
                string current = "";
index 0cffec6948a83849a078fbef99b81cb5fe7b4433..87d4f51cf78230f6ab301e1d72462a461b9f9da9 100644 (file)
@@ -166,10 +166,11 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                
                //make classify
                Classify* myclassify;
+        string outputMethodTag = pDataArray->method + ".";
                if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);             }
                else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);                           }
         else if(pDataArray->method == "zap"){  
-            outputMethodTag = search + "_" + outputMethodTag;
+            outputMethodTag = pDataArray->search + "_" + outputMethodTag;
             if (pDataArray->search == "kmer") {   myclassify = new KmerTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->kmerSize, pDataArray->cutoff); }
             else {  myclassify = new AlignTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->cutoff);  }
         }
index 94d66827068338c9b254f8e3aa30d9ec65301db9..36df8c043086552dee3c85d120f5dc1d70a71159 100644 (file)
@@ -610,10 +610,10 @@ char ConsensusSeqsCommand::getBase(vector<int> counts, int size){  //A,T,G,C,Gap
                
                //zero out counts that don't make the cutoff
                float percentage = (100.0 - cutoff) / 100.0;
-               int zeroCutoff = percentage * size;
-               
+        
                for (int i = 0; i < counts.size(); i++) {
-                       if (counts[i] < zeroCutoff) { counts[i] = 0; }
+            float countPercentage = counts[i] / (float) size;
+                       if (countPercentage < percentage) { counts[i] = 0; }
                }
                
                //any
index 00de4d6ee612439732207872eba327b8a03f699e..ef8c9eddce64e93092df8ed1031b37684f09fe38 100644 (file)
@@ -321,11 +321,11 @@ int CooccurrenceCommand::getCooccurrence(vector<SharedRAbundVector*>& thisLookUp
         int nrows = numOTUS;//rows of inital matrix
         int ncols = thisLookUp.size();//groups
         double initscore = 0.0;
-        
+       
         vector<double> stats;
-        double probabilityMatrix[ncols * nrows];
+        vector<double> probabilityMatrix; probabilityMatrix.resize(ncols * nrows, 0);
         vector<vector<int> > nullmatrix(nrows, vector<int>(ncols, 0));
-        
+       
         TrialSwap2 trial;
         
         int n = accumulate( columntotal.begin(), columntotal.end(), 0 );
index f8083cc01c10a23b4626c6ea2ccd285a15f59c87..c4454a5181c719302011e6e98410408826183735 100644 (file)
@@ -925,16 +925,11 @@ int CorrAxesCommand::getMetadata(){
                m->openInputFile(metadatafile, in);
                
                string headerLine = m->getline(in); m->gobble(in);
-               istringstream iss (headerLine,istringstream::in);
-               
-               //read the first label, because it refers to the groups
-               string columnLabel;
-               iss >> columnLabel; m->gobble(iss); 
+               vector<string> pieces = m->splitWhiteSpace(headerLine);
                
                //save names of columns you are reading
-               while (!iss.eof()) {
-                       iss >> columnLabel; m->gobble(iss);
-                       metadataLabels.push_back(columnLabel);
+               for (int i = 1; i < pieces.size(); i++) {
+                       metadataLabels.push_back(pieces[i]);
                }
                int count = metadataLabels.size();
                        
index 6b29f90464f3259c6c625421d9ee1cadbc8b2ef8..109c09ccb7c99af1f82e976078509a9239ce326b 100644 (file)
@@ -179,6 +179,169 @@ int FormatColumnMatrix::read(NameAssignment* nameMap){
                exit(1);
        }
 }
+/***********************************************************************/
+
+int FormatColumnMatrix::read(CountTable* nameMap){
+       try {           
+        
+               string firstName, secondName;
+               float distance;
+               int nseqs = nameMap->size();
+        
+               list = new ListVector(nameMap->getListVector());
+        
+               Progress* reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
+        
+               int lt = 1;
+               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
+               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
+        
+               //need to see if this is a square or a triangular matrix...
+               
+               ofstream out;
+               string tempOutFile = filename + ".temp";
+               m->openOutputFile(tempOutFile, out);
+        
+               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
+            
+                       if (m->control_pressed) { out.close();  m->mothurRemove(tempOutFile); fileHandle.close();  delete reading; return 0; }
+            
+                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
+            
+                       int itA = nameMap->get(firstName);
+                       int itB = nameMap->get(secondName);
+            
+                       if (distance == -1) { distance = 1000000; }
+            
+                       if((distance < cutoff) && (itA != itB)){
+                               if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
+                                       refRow = itA;
+                                       refCol = itB;
+                                       
+                                       //making it square
+                                       out << itA << '\t' << itB << '\t' << distance << endl;
+                                       out << itB << '\t' << itA << '\t' << distance << endl;
+                               }
+                               else if(refRow == itA && refCol == itB){        lt = 0;         } //you are square
+                               else if(refRow == itB && refCol == itA){        lt = 0;         }  //you are square
+                               else{   //making it square
+                                       out << itA << '\t' << itB << '\t' << distance << endl;
+                                       out << itB << '\t' << itA << '\t' << distance << endl;
+                               }
+                               
+                               reading->update(itA * nseqs / 2);
+                       }
+                       m->gobble(fileHandle);
+               }
+               out.close();
+               fileHandle.close();
+        
+               string squareFile;
+               if(lt == 0){  // oops, it was square
+                       squareFile = filename;
+               }else{ squareFile = tempOutFile; }
+               
+               //sort file by first column so the distances for each row are together
+               string outfile = m->getRootName(squareFile) + "sorted.dist.temp";
+               
+               //use the unix sort 
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        string command = "sort -n " + squareFile + " -o " + outfile;
+        system(command.c_str());
+#else //sort using windows sort
+        string command = "sort " + squareFile + " /O " + outfile;
+        system(command.c_str());
+#endif
+               
+               if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
+        
+               //output to new file distance for each row and save positions in file where new row begins
+               ifstream in;
+               m->openInputFile(outfile, in);
+               
+               distFile = outfile + ".rowFormatted";
+               m->openOutputFile(distFile, out);
+               
+               rowPos.resize(nseqs, -1);
+               int currentRow;
+               int first, second;
+               float dist;
+               map<int, float> rowMap;
+               map<int, float>::iterator itRow;
+               
+               //get first currentRow
+               in >> first;
+               currentRow = first;
+               
+               string firstString = toString(first);
+               for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
+               
+               while(!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
+                       
+                       in >> first >> second >> dist; m->gobble(in);
+                       
+                       if (first != currentRow) {
+                               //save position in file of each new row
+                               rowPos[currentRow] = out.tellp();
+                               
+                               out << currentRow << '\t' << rowMap.size() << '\t';
+                               
+                               for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                                       out << itRow->first << '\t' << itRow->second << '\t';
+                               }
+                               out << endl;
+                               
+                               currentRow = first;
+                               rowMap.clear();
+                               
+                               //save row you just read
+                               if (dist < cutoff) {
+                                       rowMap[second] = dist;
+                               }
+                       }else{
+                               if (dist < cutoff) {
+                                       rowMap[second] = dist;
+                               }
+                       }
+               }
+               
+               //print last Row
+               //save position in file of each new row
+               rowPos[currentRow] = out.tellp();
+               
+               out << currentRow << '\t' << rowMap.size() << '\t';
+               
+               for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                       out << itRow->first << '\t' << itRow->second << '\t';
+               }
+               out << endl;
+               
+               
+               in.close();
+               out.close();
+               
+               if (m->control_pressed) {  m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile);  delete reading; return 0; }
+               
+               m->mothurRemove(tempOutFile);
+               m->mothurRemove(outfile);
+               
+               reading->finish();
+               
+               delete reading;
+               list->setLabel("0");
+               
+               if (m->control_pressed) {  m->mothurRemove(distFile);  return 0; }
+        
+               return 1;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FormatColumnMatrix", "read");
+               exit(1);
+       }
+}
 
 /***********************************************************************/
 FormatColumnMatrix::~FormatColumnMatrix(){}
index 0fe96ed237d58ccccef2bca45c5a9fcf7d6f358b..157a28962b506137b7f06bce0dcf785e46d13d37 100644 (file)
@@ -19,6 +19,7 @@ public:
        FormatColumnMatrix(string);
        ~FormatColumnMatrix();
        int read(NameAssignment*);
+    int read(CountTable*);
        
 private:
        ifstream fileHandle;
index 7e7a99c1dbcf4bf495ad7b1a7c7caec49bef3e90..859b78ab2b72569344ccf7c9485de76333ca5ae3 100644 (file)
@@ -13,6 +13,7 @@
 #include "mothur.h"
 #include "listvector.hpp"
 #include "nameassignment.hpp"
+#include "counttable.h"
 
 
 //**********************************************************************************************************************
@@ -61,6 +62,7 @@ public:
        virtual ~FormatMatrix() {}
        
        virtual int read(NameAssignment*){ return 1; }
+    virtual int read(CountTable*){ return 1; }
        
        void setCutoff(float c)                 {       cutoff = c;                     }
        ListVector* getListVector()             {       return list;            }
index 60591178836ee793a83cbd3ec75776b463af4ead..57bc5d7389b2ad8ed980b3612f950c46ce135af7 100644 (file)
@@ -30,9 +30,14 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
                        else { convert(numTest, nseqs); }
                
-                                               
-                       list = new ListVector(nseqs);
-                       list->set(0, name);
+            if(nameMap == NULL){
+                list = new ListVector(nseqs);
+                list->set(0, name);
+            }
+            else{
+                list = new ListVector(nameMap->getListVector());
+                if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+            }                  
                        
                        char d;
                        while((d=fileHandle.get()) != EOF){
@@ -71,7 +76,9 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                
                                        fileHandle >> name;
                                        
-                                       list->set(i, name);
+                    if(nameMap == NULL){ list->set(i, name); }
+                    else { if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+                    }
                                        
                                        for(int j=0;j<i;j++){
                                        
@@ -185,7 +192,9 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                for(int i=0;i<nseqs;i++){
                                        fileHandle >> name;                
                                                                        
-                                       list->set(i, name);
+                                       if(nameMap == NULL){ list->set(i, name); }
+                    else { if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+                    }
                                        
                                        for(int j=0;j<nseqs;j++){
                                                if (m->control_pressed) {  fileHandle.close(); out.close(); m->mothurRemove(distFile);   delete reading; return 0; }
@@ -235,6 +244,236 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                 exit(1);
        }
 }
+/***********************************************************************/
+//not using nameMap
+int FormatPhylipMatrix::read(CountTable* nameMap){
+       try {
+        
+        float distance;
+        int square, nseqs;
+        string name;
+        ofstream out;
+        
+        string numTest;
+        fileHandle >> numTest >> name;
+        
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+        else { convert(numTest, nseqs); }
+               
+        if(nameMap == NULL){
+            list = new ListVector(nseqs);
+            list->set(0, name);
+        }
+        else{
+            list = new ListVector(nameMap->getListVector());
+            nameMap->get(name);
+        }                      
+        
+        char d;
+        while((d=fileHandle.get()) != EOF){
+            
+            if(isalnum(d)){  //you are square
+                square = 1;
+                fileHandle.close();  //reset file
+                
+                //open and get through numSeqs, code below formats rest of file
+                m->openInputFile(filename, fileHandle);
+                fileHandle >> nseqs; m->gobble(fileHandle);
+                
+                distFile = filename + ".rowFormatted";
+                m->openOutputFile(distFile, out);
+                break;
+            }
+            if(d == '\n'){
+                square = 0;
+                break;
+            }
+        }
+        
+        Progress* reading;
+        reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
+        
+        //lower triangle, so must go to column then formatted row file
+        if(square == 0){
+            int  index = 0;
+            
+            ofstream outTemp;
+            string tempFile = filename + ".temp";
+            m->openOutputFile(tempFile, outTemp);
+            
+            //convert to square column matrix
+            for(int i=1;i<nseqs;i++){
+                               
+                fileHandle >> name;
+                
+                if(nameMap == NULL){ list->set(i, name); }
+                else { nameMap->get(name); }
+                
+                
+                for(int j=0;j<i;j++){
+                                       
+                    if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close();  delete reading; return 0; }
+                    
+                    fileHandle >> distance;
+                    
+                    if (distance == -1) { distance = 1000000; }
+                    
+                    if(distance < cutoff){
+                        outTemp << i << '\t' << j << '\t' << distance << endl;
+                        outTemp << j << '\t' << i << '\t' << distance << endl;
+                    }
+                    index++;
+                    reading->update(index);
+                }
+            }
+            outTemp.close();
+            
+            //format from square column to rowFormatted
+            //sort file by first column so the distances for each row are together
+            string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
+            
+            //use the unix sort 
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            string command = "sort -n " + tempFile + " -o " + outfile;
+            system(command.c_str());
+#else //sort using windows sort
+            string command = "sort " + tempFile + " /O " + outfile;
+            system(command.c_str());
+#endif
+            
+            if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile);  delete reading; return 0; }
+            
+            //output to new file distance for each row and save positions in file where new row begins
+            ifstream in;
+            m->openInputFile(outfile, in);
+            
+            distFile = outfile + ".rowFormatted";
+            m->openOutputFile(distFile, out);
+            
+            rowPos.resize(nseqs, -1);
+            int currentRow;
+            int first, second;
+            float dist;
+            map<int, float> rowMap;
+            map<int, float>::iterator itRow;
+            
+            //get first currentRow
+            in >> first;
+            currentRow = first;
+            
+            string firstString = toString(first);
+            for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
+            
+            while(!in.eof()) {
+                if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile);  delete reading; return 0; }
+                
+                in >> first >> second >> dist; m->gobble(in);
+                
+                if (first != currentRow) {
+                    //save position in file of each new row
+                    rowPos[currentRow] = out.tellp();
+                    
+                    out << currentRow << '\t' << rowMap.size() << '\t';
+                    
+                    for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                        out << itRow->first << '\t' << itRow->second << '\t';
+                    }
+                    out << endl;
+                    
+                    currentRow = first;
+                    rowMap.clear();
+                    
+                    //save row you just read
+                    rowMap[second] = dist;
+                    
+                    index++;
+                    reading->update(index);
+                }else{
+                    rowMap[second] = dist;
+                }
+            }
+            
+            //print last Row
+            //save position in file of each new row
+            rowPos[currentRow] = out.tellp();
+            
+            out << currentRow << '\t' << rowMap.size() << '\t';
+            
+            for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                out << itRow->first << '\t' << itRow->second << '\t';
+            }
+            out << endl;
+            
+            in.close();
+            out.close();
+            
+            m->mothurRemove(tempFile);
+            m->mothurRemove(outfile);
+            
+            if (m->control_pressed) {  m->mothurRemove(distFile);   delete reading; return 0; }
+            
+        }
+        else{ //square matrix convert directly to formatted row file
+            int index = nseqs;
+            map<int, float> rowMap;
+            map<int, float>::iterator itRow;
+            rowPos.resize(nseqs, -1);
+            
+            for(int i=0;i<nseqs;i++){
+                fileHandle >> name;                
+                
+                if(nameMap == NULL){ list->set(i, name); }
+                else { nameMap->get(name); }
+                
+                for(int j=0;j<nseqs;j++){
+                    if (m->control_pressed) {  fileHandle.close(); out.close(); m->mothurRemove(distFile);   delete reading; return 0; }
+                    
+                    fileHandle >> distance;
+                                       
+                    if (distance == -1) { distance = 1000000; }
+                    
+                    if((distance < cutoff) && (j != i)){
+                        rowMap[j] = distance;
+                    }
+                    index++;
+                    reading->update(index);
+                }
+                
+                m->gobble(fileHandle);
+                
+                //save position in file of each new row
+                rowPos[i] = out.tellp();
+                
+                //output row to file
+                out << i << '\t' << rowMap.size() << '\t';
+                for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                    out << itRow->first << '\t' << itRow->second << '\t';
+                }
+                out << endl;
+                
+                //clear map for new row's info
+                rowMap.clear();
+            }
+        }
+        reading->finish();
+        delete reading;
+        fileHandle.close();
+        out.close();
+        
+        if (m->control_pressed) { m->mothurRemove(distFile);  return 0; }
+        
+        list->setLabel("0");
+        
+        return 1;
+        
+        
+       }
+       catch(exception& e) {
+        m->errorOut(e, "FormatPhylipMatrix", "read");
+        exit(1);
+       }
+}
+
 /***********************************************************************/
 FormatPhylipMatrix::~FormatPhylipMatrix(){}
 /***********************************************************************/
index b920d088cba0673f9c597ad870307084de551afd..c081305aa46ff9b33a7ab85b1fedf17114db9bed 100644 (file)
@@ -20,6 +20,8 @@ public:
        FormatPhylipMatrix(string);
        ~FormatPhylipMatrix();
        int read(NameAssignment*);
+    int read(CountTable*);
+    
 private:
        ifstream fileHandle;
        string filename;
index 9f4dd54491bab042ae243998de20559b03285dfb..1d26b47b39c3ee998fb018bad12fb4e904b1e333 100644 (file)
@@ -574,6 +574,8 @@ int GetOTURepCommand::readDist() {
                 readMatrix->read(nameMap);
             }else if (countfile != "") {
                 readMatrix->read(&ct);
+            }else {
+               readMatrix->read(nameMap); 
             }
                        
                        if (m->control_pressed) { delete readMatrix; return 0; }
@@ -614,9 +616,11 @@ int GetOTURepCommand::readDist() {
             if(namefile != ""){        
                 nameMap = new NameAssignment(namefile);
                 nameMap->readMap();
-                readMatrix->read(nameMap);
+                formatMatrix->read(nameMap);
             }else if (countfile != "") {
-                readMatrix->read(&ct);
+                formatMatrix->read(&ct);
+            }else {
+                formatMatrix->read(nameMap); 
             }
                        
                        if (m->control_pressed) { delete formatMatrix;  return 0; }
index d160525c70174131e8d32ac9803f5bcc756cb80f..441eaf8a4dc4e77beb36c7e39c102f9aa13f0d53 100644 (file)
@@ -173,27 +173,27 @@ HeatMapCommand::HeatMapCommand(string option) {
                        
                        //check for required parameters
                        listfile = validParameter.validFile(parameters, "list", true);
-                       if (listfile == "not open") { listfile = ""; abort = true; }
+                       if (listfile == "not open") { abort = true; }
                        else if (listfile == "not found") { listfile = ""; }
                        else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
                        
                        sabundfile = validParameter.validFile(parameters, "sabund", true);
-                       if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
+                       if (sabundfile == "not open") {  abort = true; }        
                        else if (sabundfile == "not found") { sabundfile = ""; }
                        else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
                        
                        rabundfile = validParameter.validFile(parameters, "rabund", true);
-                       if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
+                       if (rabundfile == "not open") {  abort = true; }        
                        else if (rabundfile == "not found") { rabundfile = ""; }
                        else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
                        
                        sharedfile = validParameter.validFile(parameters, "shared", true);
-                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
+                       if (sharedfile == "not open") { abort = true; } 
                        else if (sharedfile == "not found") { sharedfile = ""; }
                        else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
                        
                        relabundfile = validParameter.validFile(parameters, "relabund", true);
-                       if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
+                       if (relabundfile == "not open") {  abort = true; }      
                        else if (relabundfile == "not found") { relabundfile = ""; }
                        else {  format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); }
                        
index 006f1e4d5ee0b046e3216b0f470ce5d5c6200aa0..6496cea746a1cf6895005564798a4c5cb909f88d 100644 (file)
 #include "homovacommand.h"
 #include "groupmap.h"
 #include "readphylipvector.h"
+#include "sharedutilities.h"
 
 //**********************************************************************************************************************
 vector<string> HomovaCommand::setParameters(){ 
        try {
                CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
                CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
+        CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
                CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -36,9 +38,10 @@ string HomovaCommand::getHelpString(){
                string helpString = "";
                helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
                helpString += "The homova command outputs a .homova file. \n";
-               helpString += "The homova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required, unless valid current files exist.\n";
+               helpString += "The homova command parameters are phylip, iters, sets and alpha.  The phylip and design parameters are required, unless valid current files exist.\n";
                helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
                helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
+        helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
                helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n";
                helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
                helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
@@ -162,6 +165,12 @@ HomovaCommand::HomovaCommand(string option) {
                        temp = validParameter.validFile(parameters, "alpha", false);
                        if (temp == "not found") { temp = "0.05"; }
                        m->mothurConvert(temp, experimentwiseAlpha); 
+            
+            string sets = validParameter.validFile(parameters, "sets", false);                 
+                       if (sets == "not found") { sets = ""; }
+                       else { 
+                               m->splitAtDash(sets, Sets);
+                       }
                }
                
        }
@@ -187,6 +196,31 @@ int HomovaCommand::execute(){
                ReadPhylipVector readMatrix(phylipFileName);
                vector<string> sampleNames = readMatrix.read(distanceMatrix);
                
+        if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
+            SharedUtil util; 
+            vector<string> dGroups = designMap->getNamesOfGroups();
+            util.setGroups(Sets, dGroups);  
+            
+            for(int i=0;i<distanceMatrix.size();i++){
+                
+                if (m->control_pressed) { delete designMap; return 0; }
+                
+                string group = designMap->getGroup(sampleNames[i]);
+                
+                if (group == "not found") {
+                    m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+                }else if (!m->inUsersGroups(group, Sets)){  //not in set we want remove it
+                    //remove from all other rows
+                    for(int j=0;j<distanceMatrix.size();j++){
+                        distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
+                    }
+                    distanceMatrix.erase(distanceMatrix.begin()+i);
+                    sampleNames.erase(sampleNames.begin()+i);
+                    i--;
+                }
+            }
+        }
+
                for(int i=0;i<distanceMatrix.size();i++){
                        for(int j=0;j<i;j++){
                                distanceMatrix[i][j] *= distanceMatrix[i][j];   
index b6f21097b4fde750934526285f0ab5efe9849ca0..9398627105ad53f7644c47a0db4b0e969f77862f 100644 (file)
@@ -41,7 +41,7 @@ private:
        map<string, vector<int> > getRandomizedGroups(map<string, vector<int> >);
 
        bool abort;
-       vector<string> outputNames;
+       vector<string> outputNames, Sets;
 
        string outputDir, inputDir, designFileName, phylipFileName;
        GroupMap* designMap;
index 68e70ee3e48e58a89f75ef7547d7409620947cb2..6385adef0fd1c36f73e509604f10b082a75e54a5 100644 (file)
@@ -9,6 +9,7 @@
 #include "makebiomcommand.h"
 #include "sharedrabundvector.h"
 #include "inputdata.h"
+#include "sharedutilities.h"
 
 //taken from http://biom-format.org/documentation/biom_format.html
 /* Minimal Sparse 
@@ -93,6 +94,7 @@ vector<string> MakeBiomCommand::setParameters(){
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
         CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcontaxonomy);
+        CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pmetadata);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -112,11 +114,12 @@ vector<string> MakeBiomCommand::setParameters(){
 string MakeBiomCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The make.biom command parameters are shared, contaxonomy, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n";
+               helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
                helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
                helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
-        helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled.\n";
+        helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
+        helpString += "The metadata parameter is used to provide experimental parameters to the columns.  Things like 'sample1 gut human_gut'. \n";
                helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
                helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
                helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
@@ -211,6 +214,14 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
                                }
+                
+                it = parameters.find("metadata");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
+                               }
                        }
             
                        //get shared file
@@ -231,6 +242,9 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                        if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
                        else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
 
+            metadatafile = validParameter.validFile(parameters, "metadata", true);
+                       if (metadatafile == "not found") {  metadatafile = "";  }
+                       else if (metadatafile == "not open") { metadatafile = ""; abort = true; }
             
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
@@ -280,6 +294,8 @@ int MakeBiomCommand::execute(){
             labels.insert(lastLabel);
         }
                
+        getSampleMetaData(lookup);
+        
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
@@ -423,13 +439,13 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
                     {"id":"Sample6", "metadata":null}
                     ],*/
         
-        string colBack = "\", \"metadata\":null}";
+        string colBack = "\", \"metadata\":";
         out << spaces + "\"columns\":[\n";
         for (int i = 0; i < lookup.size()-1; i++) {
             if (m->control_pressed) {  out.close(); return 0; }
-            out << rowFront << lookup[i]->getGroup() << colBack << ",\n";
+            out << rowFront << lookup[i]->getGroup() << colBack << sampleMetadata[i] << "},\n";
         }
-        out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << "\n" + spaces + "],\n";
+        out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << sampleMetadata[lookup.size()-1] << "}\n" + spaces + "],\n";
         
         out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n";
         out <<  spaces + "\"shape\": [" << m->currentBinLabels.size() << "," << lookup.size() << "],\n";
@@ -608,6 +624,81 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
        }
 
 }
+//**********************************************************************************************************************
+int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
+       try {
+        
+        if (metadatafile == "") { for (int i = 0; i < lookup.size(); i++) {  sampleMetadata.push_back("null");  } }
+        else {
+            ifstream in;
+            m->openInputFile(metadatafile, in);
+            
+            vector<string> groupNames, metadataLabels;
+            map<string, vector<string> > lines;
+            
+            string headerLine = m->getline(in); m->gobble(in);
+            vector<string> pieces = m->splitWhiteSpace(headerLine);
+            
+            //save names of columns you are reading
+            for (int i = 1; i < pieces.size(); i++) {
+                metadataLabels.push_back(pieces[i]);
+            }
+            int count = metadataLabels.size();
+                       
+            vector<string> groups = m->getGroups();
+            
+            //read rest of file
+            while (!in.eof()) {
+                
+                if (m->control_pressed) { in.close(); return 0; }
+                
+                string group = "";
+                in >> group; m->gobble(in);
+                groupNames.push_back(group);
+                
+                string line = m->getline(in); m->gobble(in);
+                vector<string> thisPieces = m->splitWhiteSpaceWithQuotes(line);
+        
+                if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); }
+                else {  if (m->inUsersGroups(group, groups)) { lines[group] = thisPieces; } }
+                
+                m->gobble(in);
+            }
+            in.close();
+            
+            map<string, vector<string> >::iterator it;
+            for (int i = 0; i < lookup.size(); i++) {
+                
+                if (m->control_pressed) { return 0; }
+                
+                it = lines.find(lookup[i]->getGroup());
+                
+                if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + lookup[i]->getGroup() + ", quitting.\n"); m->control_pressed = true; }
+                else {
+                    vector<string> values = it->second;
+                    
+                    string data = "{";
+                    for (int j = 0; j < metadataLabels.size()-1; j++) { 
+                        values[j] = m->removeQuotes(values[j]); 
+                        data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", "; 
+                    }
+                    values[metadataLabels.size()-1] = m->removeQuotes(values[metadataLabels.size()-1]);
+                    data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}";
+                    sampleMetadata.push_back(data);
+                }
+            }
+        }
+        
+        return 0;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeBiomCommand", "getSampleMetaData");
+               exit(1);
+       }
+    
+}
+
 /**************************************************************************************************/
 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
 vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
index 8a458fe33a398de607ffe05e48a5950b9368353c..ee9bd1c99cf415c93b57fc85087a06d870298115 100644 (file)
@@ -35,8 +35,8 @@ public:
        
 private:
     
-       string sharedfile, contaxonomyfile, groups, outputDir, format, label;
-       vector<string> outputNames, Groups;
+       string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label;
+       vector<string> outputNames, Groups, sampleMetadata;
        set<string> labels;
     
        bool abort, allLines;
@@ -44,6 +44,7 @@ private:
     int getBiom(vector<SharedRAbundVector*>&);
     vector<string> getMetaData(vector<SharedRAbundVector*>&);
     vector<string> parseTax(string tax, vector<string>& scores);
+    int getSampleMetaData(vector<SharedRAbundVector*>&);
 };
 
 
index 4ae25ce0e2623ec9db0dc6568a37df7e64382140..f2f6449a383cb58c746ea0ccee9fd2ed1483b246 100644 (file)
@@ -16,8 +16,8 @@ vector<string> MakeContigsCommand::setParameters(){
         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
-               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
+//        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
+//             CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
 
         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
@@ -220,16 +220,16 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
                        m->mothurConvert(temp, pdiffs);
             
           temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
-                       m->mothurConvert(temp, ldiffs);
//           temp = validParameter.validFile(parameters, "ldiffs", false);            if (temp == "not found") { temp = "0"; }
+//                     m->mothurConvert(temp, ldiffs);
             
           temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
-                       m->mothurConvert(temp, sdiffs);
//           temp = validParameter.validFile(parameters, "sdiffs", false);            if (temp == "not found") { temp = "0"; }
+//             m->mothurConvert(temp, sdiffs);
                        
                        temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
                        m->mothurConvert(temp, tdiffs);
                        
-                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
 
             temp = validParameter.validFile(parameters, "allfiles", false);            if (temp == "not found") { temp = "F"; }
                        allFiles = m->isTrue(temp);
@@ -261,15 +261,32 @@ int MakeContigsCommand::execute(){
     
         if (m->control_pressed) { return 0; }
         
-        string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("fasta");
-        string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("qfile");
+        vector<vector<string> > fastaFileNames;
+               vector<vector<string> > qualFileNames;
+        createGroup = false;
+        string outputGroupFileName;
+        if(oligosfile != ""){
+                       createGroup = getOligos(fastaFileNames, qualFileNames);
+            if (createGroup) { 
+                outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("group");
+                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
+            }
+               }
+        
+        string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("fasta");
+        string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("qfile");
+        string outScrapFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("fasta");
+        string outScrapQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("qfile");
+
         string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch");
         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
         outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
+        outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
+        outputNames.push_back(outScrapQualFile); outputTypes["qfile"].push_back(outScrapQualFile);
         outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
         
         m->mothurOut("Making contigs...\n"); 
-        createProcesses(files, outFastaFile, outQualFile, outMisMatchFile);
+        createProcesses(files, outFastaFile, outQualFile, outScrapFastaFile, outScrapQualFile, outMisMatchFile, fastaFileNames, qualFileNames);
         m->mothurOut("Done.\n");
         
         //remove temp fasta and qual files
@@ -277,8 +294,79 @@ int MakeContigsCommand::execute(){
         
         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); }  return 0; }
         
+        if(allFiles){
+                       map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
+                       map<string, string>::iterator it;
+                       set<string> namesToRemove;
+                       for(int i=0;i<fastaFileNames.size();i++){
+                               for(int j=0;j<fastaFileNames[0].size();j++){
+                                       if (fastaFileNames[i][j] != "") {
+                                               if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
+                                                       if(m->isBlank(fastaFileNames[i][j])){
+                                                               m->mothurRemove(fastaFileNames[i][j]);
+                                                               namesToRemove.insert(fastaFileNames[i][j]);
+
+                                m->mothurRemove(qualFileNames[i][j]);
+                                namesToRemove.insert(qualFileNames[i][j]);
+                                                       }else{  
+                                                               it = uniqueFastaNames.find(fastaFileNames[i][j]);
+                                                               if (it == uniqueFastaNames.end()) {     
+                                                                       uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
+                                                               }       
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+                       
+                       //remove names for outputFileNames, just cleans up the output
+                       vector<string> outputNames2;
+                       for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
+                       outputNames = outputNames2;
+                       
+            for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
+                ifstream in;
+                m->openInputFile(it->first, in);
+                
+                ofstream out;
+                string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
+                thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); 
+                m->openOutputFile(thisGroupName, out);
+                 
+                while (!in.eof()){
+                    if (m->control_pressed) { break; }
+                    
+                    Sequence currSeq(in); m->gobble(in);
+                    out << currSeq.getName() << '\t' << it->second << endl;  
+                }
+                in.close();
+                out.close();
+            }
+        }
+        
+        if (createGroup) {
+            ofstream outGroup;
+            m->openOutputFile(outputGroupFileName, outGroup);
+            for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
+                outGroup << itGroup->first << '\t' << itGroup->second << endl;
+            }
+            outGroup.close();
+        }
         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
         
+        if (m->control_pressed) {      for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
+        
+               //output group counts
+               m->mothurOutEndLine();
+               int total = 0;
+               if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
+               for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
+            total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
+               }
+               if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
+               
+               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
+        
         string currentFasta = "";
                itTypes = outputTypes.find("fasta");
                if (itTypes != outputTypes.end()) {
@@ -311,7 +399,7 @@ int MakeContigsCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputMisMatches) {
+int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
        try {
                int num = 0;
                vector<int> processIDS;
@@ -326,14 +414,52 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp");
+                vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+                               vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+                
+                               if(allFiles){
+                                       ofstream temp;
+                    
+                                       for(int i=0;i<tempFASTAFileNames.size();i++){
+                                               for(int j=0;j<tempFASTAFileNames[i].size();j++){
+                                                       if (tempFASTAFileNames[i][j] != "") {
+                                                               tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
+                                                               m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
+                                
+                                tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
+                                m->openOutputFile(tempPrimerQualFileNames[i][j], temp);                temp.close();
+                                                       }
+                                               }
+                                       }
+                               }
+
+                               num = driver(files[process], 
+                             outputFasta + toString(getpid()) + ".temp", 
+                             outputQual + toString(getpid()) + ".temp", 
+                             outputScrapFasta + toString(getpid()) + ".temp", 
+                             outputScrapQual + toString(getpid()) + ".temp",
+                             outputMisMatches + toString(getpid()) + ".temp",
+                             tempFASTAFileNames,
+                             tempPrimerQualFileNames);
                                
-                               //pass numSeqs to parent
-                               ofstream out;
-                               string tempFile = outputFasta + toString(getpid()) + ".num.temp";
-                               m->openOutputFile(tempFile, out);
-                               out << num << endl;
-                               out.close();
+                               //pass groupCounts to parent
+                ofstream out;
+                string tempFile = toString(getpid()) + ".num.temp";
+                m->openOutputFile(tempFile, out);
+                out << num << endl;
+                               if(createGroup){
+                                       out << groupCounts.size() << endl;
+                                       
+                                       for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
+                                               out << it->first << '\t' << it->second << endl;
+                                       }
+                    
+                    out << groupMap.size() << endl;
+                    for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
+                                               out << it->first << '\t' << it->second << endl;
+                                       }
+                               }
+                out.close();
                                
                                exit(0);
                        }else { 
@@ -343,8 +469,14 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                        }
                }
                
+        ofstream temp;
+               m->openOutputFile(outputFasta, temp);           temp.close();
+               m->openOutputFile(outputQual, temp);    temp.close();
+        m->openOutputFile(outputScrapFasta, temp);             temp.close();
+        m->openOutputFile(outputScrapQual, temp);              temp.close();
+        
                //do my part
-               num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
+               num = driver(files[processors-1], outputFasta, outputQual, outputScrapFasta, outputScrapQual, outputMisMatches, fastaFileNames, qualFileNames);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -353,11 +485,39 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                }
         
                for (int i = 0; i < processIDS.size(); i++) {
-                       ifstream in;
-                       string tempFile =  outputFasta + toString(processIDS[i]) + ".num.temp";
-                       m->openInputFile(tempFile, in);
-                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
-                       in.close(); m->mothurRemove(tempFile);
+            ifstream in;
+            string tempFile = toString(processIDS[i]) + ".num.temp";
+            m->openInputFile(tempFile, in);
+            int tempNum;
+            in >> tempNum; num += tempNum; m->gobble(in);
+            
+                       if(createGroup){
+                               string group;
+                               in >> tempNum; m->gobble(in);
+                               
+                               if (tempNum != 0) {
+                                       for (int j = 0; j < tempNum; j++) { 
+                        int groupNum;
+                                               in >> group >> groupNum; m->gobble(in);
+                        
+                                               map<string, int>::iterator it = groupCounts.find(group);
+                                               if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
+                                               else { groupCounts[it->first] += groupNum; }
+                                       }
+                               }
+                in >> tempNum; m->gobble(in);
+                if (tempNum != 0) {
+                                       for (int j = 0; j < tempNum; j++) { 
+                        string group, seqName;
+                                               in >> seqName >> group; m->gobble(in);
+                        
+                                               map<string, string>::iterator it = groupMap.find(seqName);
+                                               if (it == groupMap.end()) {     groupMap[seqName] = group; }
+                                               else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
+                                       }
+                               }
+                       }
+            in.close(); m->mothurRemove(tempFile);
         }
     #else
         
@@ -371,17 +531,67 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
-               for( int i=0; i<processors-1; i++ ){
-                       string extension = toString(i) + ".temp";
-                       
-                       contigsData* tempcontig = new contigsData(files[i], (outputFasta + extension), (outputQual + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, i);
+               for( int h=0; h<processors-1; h++ ){
+                       string extension = "";
+                       if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
+            vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+            vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+            
+            if(allFiles){
+                ofstream temp;
+                
+                for(int i=0;i<tempFASTAFileNames.size();i++){
+                    for(int j=0;j<tempFASTAFileNames[i].size();j++){
+                        if (tempFASTAFileNames[i][j] != "") {
+                            tempFASTAFileNames[i][j] += extension;
+                            m->openOutputFile(tempFASTAFileNames[i][j], temp);                 temp.close();
+                            
+                            
+                            tempPrimerQualFileNames[i][j] += extension;
+                            m->openOutputFile(tempPrimerQualFileNames[i][j], temp);            temp.close();
+                        }
+                    }
+                }
+            }
+
+                                 
+                       contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputQual + extension), (outputScrapFasta + extension), (outputScrapQual + extension),(outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, barcodes, primers, tempFASTAFileNames, tempPrimerQualFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h);
                        pDataArray.push_back(tempcontig);
-                       processIDS.push_back(i);
             
-                       hThreadArray[i] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+                       hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
                }
+        
+        vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+        vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+
+        if(allFiles){
+            ofstream temp;
+            string extension = toString(processors-1) + ".temp";
+            
+            for(int i=0;i<tempFASTAFileNames.size();i++){
+                for(int j=0;j<tempFASTAFileNames[i].size();j++){
+                    if (tempFASTAFileNames[i][j] != "") {
+                        tempFASTAFileNames[i][j] += extension;
+                        m->openOutputFile(tempFASTAFileNames[i][j], temp);                     temp.close();
+                        
+                        
+                        tempPrimerQualFileNames[i][j] += extension;
+                        m->openOutputFile(tempPrimerQualFileNames[i][j], temp);                temp.close();
+                    }
+                }
+            }
+        }
+
+               //parent do my part
+               ofstream temp;
+               m->openOutputFile(outputFasta, temp);           temp.close();
+               m->openOutputFile(outputQual, temp);    temp.close();
+        m->openOutputFile(outputScrapFasta, temp);             temp.close();
+        m->openOutputFile(outputScrapQual, temp);              temp.close();
                
-        num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);          
+        
+        //do my part
+               num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputQual+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputScrapQual+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames);       
         
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -389,6 +599,16 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                        num += pDataArray[i]->count;
+            for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
+                map<string, int>::iterator it2 = groupCounts.find(it->first);
+                if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
+                else { groupCounts[it->first] += it->second; }
+            }
+            for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
+                map<string, string>::iterator it2 = groupMap.find(it->first);
+                if (it2 == groupMap.end()) {   groupMap[it->first] = it->second; }
+                else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
+            }
             CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
         }
@@ -402,8 +622,28 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                        m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
                        m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
             
+            m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
+                       m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
+                       
+                       m->appendFiles((outputScrapQual + toString(processIDS[i]) + ".temp"), outputScrapQual);
+                       m->mothurRemove((outputScrapQual + toString(processIDS[i]) + ".temp"));
+            
             m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
                        m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
+            
+            if(allFiles){
+                               for(int j=0;j<fastaFileNames.size();j++){
+                                       for(int k=0;k<fastaFileNames[j].size();k++){
+                                               if (fastaFileNames[j][k] != "") {
+                                                       m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
+                                                       m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
+                                                       
+                            m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
+                            m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
+                                               }
+                                       }
+                               }
+                       }
                }
                
                return num;
@@ -414,7 +654,7 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
        }
 }
 //**********************************************************************************************************************
-int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputMisMatches){
+int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames){
     try {
         
         Alignment* alignment;
@@ -435,26 +675,51 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
         m->openInputFile(thisrfastafile, inRFasta);
         m->openInputFile(thisrqualfile, inRQual);
         
-        ofstream outFasta, outQual, outMisMatch;
+        ofstream outFasta, outQual, outMisMatch, outScrapFasta, outScrapQual;
         m->openOutputFile(outputFasta, outFasta);
         m->openOutputFile(outputQual, outQual);
+        m->openOutputFile(outputScrapFasta, outScrapFasta);
+        m->openOutputFile(outputScrapQual, outScrapQual);
         m->openOutputFile(outputMisMatches, outMisMatch);
         outMisMatch << "Name\tLength\tMisMatches\n";
         
+        TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
+        
         while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
             
             if (m->control_pressed) { break; }
             
+            int success = 1;
+            string trashCode = "";
+            int currentSeqsDiffs = 0;
+
             //read seqs and quality info
             Sequence fSeq(inFFasta); m->gobble(inFFasta);
             Sequence rSeq(inRFasta); m->gobble(inRFasta);
             QualityScores fQual(inFQual); m->gobble(inFQual);
             QualityScores rQual(inRQual); m->gobble(inRQual);
             
+            int barcodeIndex = 0;
+            int primerIndex = 0;
+            
+            if(barcodes.size() != 0){
+                success = trimOligos.stripBarcode(fSeq, rSeq, fQual, rQual, barcodeIndex);
+                if(success > bdiffs)           {       trashCode += 'b';       }
+                else{ currentSeqsDiffs += success;  }
+            }
+            
+            if(primers.size() != 0){
+                success = trimOligos.stripForward(fSeq, rSeq, fQual, rQual, primerIndex);
+                if(success > pdiffs)           {       trashCode += 'f';       }
+                else{ currentSeqsDiffs += success;  }
+            }
+            
+            if (currentSeqsDiffs > tdiffs)     {       trashCode += 't';   }
+            
             //flip the reverse reads
             rSeq.reverseComplement();
             rQual.flipQScores();
-            
+
             //pairwise align
             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
@@ -462,7 +727,7 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
             fSeq.setAligned(alignment->getSeqAAln());
             rSeq.setAligned(alignment->getSeqBAln());
             int length = fSeq.getAligned().length();
-        
+            
             //traverse alignments merging into one contiguous seq
             string contig = "";
             vector<int> contigScores; 
@@ -471,8 +736,8 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
             string seq2 = rSeq.getAligned();
             vector<int> scores1 = fQual.getQualityScores();
             vector<int> scores2 = rQual.getQualityScores();
-
-           // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
+            
+            // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
             int overlapStart = fSeq.getStartPos();
             int seq2Start = rSeq.getStartPos();
             //bigger of the 2 starting positions is the location of the overlapping start
@@ -532,16 +797,60 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
                     contig += seq1[i];
                     contigScores.push_back(scores1[ABaseMap[i]]);
                 }
+                
+            }
 
+            if(trashCode.length() == 0){
+                if (createGroup) {
+                    if(barcodes.size() != 0){
+                        string thisGroup = barcodeNameVector[barcodeIndex];
+                        if (primers.size() != 0) { 
+                            if (primerNameVector[primerIndex] != "") { 
+                                if(thisGroup != "") {
+                                    thisGroup += "." + primerNameVector[primerIndex]; 
+                                }else {
+                                    thisGroup = primerNameVector[primerIndex]; 
+                                }
+                            } 
+                        }
+                        
+                        if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
+                        
+                        groupMap[fSeq.getName()] = thisGroup; 
+                        
+                        map<string, int>::iterator it = groupCounts.find(thisGroup);
+                        if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
+                        else { groupCounts[it->first] ++; }
+                        
+                    }
+                }
+                
+                if(allFiles){
+                    ofstream output;
+                    m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
+                    output << ">" << fSeq.getName() << endl << contig << endl;
+                    output.close();
+                    
+                    m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
+                    output << ">" << fSeq.getName() << endl;
+                    for (int i = 0; i < contigScores.size(); i++) { output << contigScores[i] << ' '; }
+                    output << endl;
+                    output.close();                                                    
+                }
+                
+                //output
+                outFasta << ">" << fSeq.getName() << endl << contig << endl;
+                outQual << ">" << fSeq.getName() << endl;
+                for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+                outQual << endl;
+                outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+            }else {
+                //output
+                outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
+                outScrapQual << ">" << fSeq.getName() << " | " << trashCode << endl;
+                for (int i = 0; i < contigScores.size(); i++) { outScrapQual << contigScores[i] << ' '; }
+                outScrapQual << endl;
             }
-            //if (num < 5) { cout << overlapStart << '\t' << overlapEnd << endl << seq1 << endl << seq2 << endl<< contig << endl; }
-            //output
-            outFasta << ">" << fSeq.getName() << endl << contig << endl;
-            outQual << ">" << fSeq.getName() << endl;
-            for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
-            outQual << endl;
-            outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
-            
             num++;
             
                        //report progress
@@ -557,10 +866,12 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
         inRQual.close();
         outFasta.close();
         outQual.close();
+        outScrapFasta.close();
+        outScrapQual.close();
         outMisMatch.close();
         delete alignment;
         
-        if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);  m->mothurRemove(outputMisMatches);}
+        if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);   m->mothurRemove(outputScrapQual); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);}
         
         return num;
     }
@@ -618,39 +929,57 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
         m->openInputFile(rfastqfile, inReverse);
         
         count = 0;
-        while ((!inForward.eof()) && (!inReverse.eof())) {
+        map<string, fastqRead> uniques;
+        map<string, fastqRead>::iterator itUniques;
+        while ((!inForward.eof()) || (!inReverse.eof())) {
             
             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
             
             //get a read from forward and reverse fastq files
-            fastqRead fread = readFastq(inForward);
-            fastqRead rread = readFastq(inReverse);
-            checkReads(fread, rread);
-            
-            if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
-            
-            //if the reads are okay write to output files
-            int process = count % processors;
+            bool ignoref, ignorer;
+            fastqRead thisFread, thisRread;
+            if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
+            else { ignoref = true; }
+            if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
+            else { ignorer = true; }
             
-            *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
-            *(tempfiles[process][1]) << ">" << fread.name << endl;
-            for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
-            *(tempfiles[process][1]) << endl;
-            *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
-            *(tempfiles[process][3]) << ">" << rread.name << endl;
-            for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
-            *(tempfiles[process][3]) << endl;
+            vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
             
-            count++;
-            
-            //report progress
-                       if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-                       
+            for (int i = 0; i < reads.size(); i++) {
+                fastqRead fread = reads[i].forward;
+                fastqRead rread = reads[i].reverse;
+                
+                if (checkReads(fread, rread)) {
+                    if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
+                    
+                    //if the reads are okay write to output files
+                    int process = count % processors;
+                    
+                    *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
+                    *(tempfiles[process][1]) << ">" << fread.name << endl;
+                    for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
+                    *(tempfiles[process][1]) << endl;
+                    *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
+                    *(tempfiles[process][3]) << ">" << rread.name << endl;
+                    for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
+                    *(tempfiles[process][3]) << endl;
+                    
+                    count++;
+                    
+                    //report progress
+                    if((count) % 10000 == 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                }
+            }
                }
                //report progress
                if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-               
         
+        if (uniques.size() != 0) {
+            for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
+                m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
+            }
+            m->mothurOutEndLine();
+        }
         
         //close files, delete ofstreams
         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
@@ -658,6 +987,7 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
         inReverse.close();
         
         //adjust for really large processors or really small files
+        if (count == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
         if (count < processors) { 
             for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
             files.resize(count);
@@ -672,43 +1002,110 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
     }
 }
 //**********************************************************************************************************************
-fastqRead MakeContigsCommand::readFastq(ifstream& in){
+vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques){
+    try {
+        vector<pairFastqRead> reads;
+        map<string, fastqRead>::iterator itUniques;
+            
+        if (!ignoref && !ignorer) {
+            if (forward.name == reverse.name) { 
+                pairFastqRead temp(forward, reverse);
+                reads.push_back(temp);
+            }else {
+                //look for forward pair
+                itUniques = uniques.find(forward.name);
+                if (itUniques != uniques.end()) {  //we have the pair for this read
+                    pairFastqRead temp(forward, itUniques->second);
+                    reads.push_back(temp);
+                    uniques.erase(itUniques);
+                }else { //save this read for later
+                    uniques[forward.name] = forward;
+                }
+                
+                //look for reverse pair
+                itUniques = uniques.find(reverse.name);
+                if (itUniques != uniques.end()) {  //we have the pair for this read
+                    pairFastqRead temp(itUniques->second, reverse);
+                    reads.push_back(temp);
+                    uniques.erase(itUniques);
+                }else { //save this read for later
+                    uniques[reverse.name] = reverse;
+                }
+            }
+        }else if (!ignoref && ignorer) { //ignore reverse keep forward
+            //look for forward pair
+            itUniques = uniques.find(forward.name);
+            if (itUniques != uniques.end()) {  //we have the pair for this read
+                pairFastqRead temp(forward, itUniques->second);
+                reads.push_back(temp);
+                uniques.erase(itUniques);
+            }else { //save this read for later
+                uniques[forward.name] = forward;
+            }
+
+        }else if (ignoref && !ignorer) { //ignore forward keep reverse
+            //look for reverse pair
+            itUniques = uniques.find(reverse.name);
+            if (itUniques != uniques.end()) {  //we have the pair for this read
+                pairFastqRead temp(itUniques->second, reverse);
+                reads.push_back(temp);
+                uniques.erase(itUniques);
+            }else { //save this read for later
+                uniques[reverse.name] = reverse;
+            }
+        }//else ignore both and do nothing
+        
+        return reads;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
     try {
         fastqRead read;
         
+        ignore = false;
+        
         //read sequence name
-        string name = m->getline(in); m->gobble(in);
-        if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
-        else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        string line = m->getline(in); m->gobble(in);
+        vector<string> pieces = m->splitWhiteSpace(line);
+        string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
+        if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
+        else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
         else { name = name.substr(1); }
         
         //read sequence
         string sequence = m->getline(in); m->gobble(in);
-        if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
         
         //read sequence name
-        string name2 = m->getline(in); m->gobble(in);
-        if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
-        else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
-        else { name2 = name2.substr(1);  }
+        line = m->getline(in); m->gobble(in);
+        pieces = m->splitWhiteSpace(line);
+        string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
+        if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
+        else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
+        else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
         
         //read quality scores
         string quality = m->getline(in); m->gobble(in);
-        if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; }
-        
+        if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
+         
         //sanity check sequence length and number of quality scores match
-        if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } }
-        if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
+        if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
         
         vector<int> qualScores;
-               int controlChar = int('@');
+               int controlChar = int('!');
                for (int i = 0; i < quality.length(); i++) { 
                        int temp = int(quality[i]);
                        temp -= controlChar;
                        
                        qualScores.push_back(temp);
                }
-
+    
         read.name = name;
         read.sequence = sequence;
         read.scores = qualScores;
@@ -725,34 +1122,22 @@ bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
     try {
         bool good = true;
         
-        //fix names
-        if ((forward.name.length() > 2) && (reverse.name.length() > 2)) {
-            forward.name = forward.name.substr(0, forward.name.length()-2);
-            reverse.name = reverse.name.substr(0, reverse.name.length()-2);
-        }else { good = false; m->control_pressed = true; }
-        
-        //do names match
-        if (forward.name != reverse.name) {
-            m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n");
-            good = false; m->control_pressed = true;
-        }
-        
         //do sequence lengths match
         if (forward.sequence.length() != reverse.sequence.length()) {
-            m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n");
-            good = false; m->control_pressed = true;
+            m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ", ignoring.\n");
+            good = false; 
         }
         
         //do number of qual scores match 
         if (forward.scores.size() != reverse.scores.size()) {
-            m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n");
-            good = false; m->control_pressed = true;
+            m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ", ignoring.\n");
+            good = false; 
         }
 
         return good;
     }
     catch(exception& e) {
-        m->errorOut(e, "MakeContigsCommand", "readFastq");
+        m->errorOut(e, "MakeContigsCommand", "checkReads");
         exit(1);
     }
 }
@@ -799,7 +1184,7 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                                        if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
                                }
                                
-                               if(type == "PRIMER"){
+                               if(type == "FORWARD"){
                                        m->gobble(in);
                                        
                     in >> roligo;
@@ -808,7 +1193,7 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                         roligo[i] = toupper(roligo[i]);
                         if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
                     }
-                    roligo = reverseOligo(roligo);
+                    //roligo = reverseOligo(roligo);
                     
                     group = "";
                     
@@ -840,7 +1225,7 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                         roligo[i] = toupper(roligo[i]);
                         if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
                     }
-                    roligo = reverseOligo(roligo);
+                    //roligo = reverseOligo(roligo);
                     
                     oligosPair newPair(foligo, roligo);
                     
@@ -863,8 +1248,10 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                                        barcodeNameVector.push_back(group);
                                }else if(type == "LINKER"){
                                        linker.push_back(foligo);
+                    m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
                                }else if(type == "SPACER"){
                                        spacer.push_back(foligo);
+                    m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
                                }
                                else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
                        }
index 84e43c01c9a0b8c7775c6a46494a6fc0fcb642a6..86a8450afaf76ba3be6a167ce8b856a605f09166 100644 (file)
@@ -29,6 +29,14 @@ struct fastqRead {
        ~fastqRead() {};
 };
 
+struct pairFastqRead {
+       fastqRead forward;
+    fastqRead reverse;
+       
+       pairFastqRead() {};
+       pairFastqRead(fastqRead f, fastqRead r) : forward(f), reverse(r){};
+       ~pairFastqRead() {};
+};
 /**************************************************************************************************/
 
 class MakeContigsCommand : public Command {
@@ -50,7 +58,7 @@ public:
     void help() { m->mothurOut(getHelpString()); }     
     
 private:
-    bool abort, allFiles;
+    bool abort, allFiles, createGroup;
     string outputDir, ffastqfile, rfastqfile, align, oligosfile;
        float match, misMatch, gapOpen, gapExtend;
        int processors, longestBase, threshold, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
@@ -63,18 +71,20 @@ private:
        vector<string> primerNameVector;        
        vector<string> barcodeNameVector;       
     
-       map<string, int> groupCounts;  
+       map<string, int> groupCounts; 
+    map<string, string> groupMap;
     //map<string, int> combos;
        //map<string, int> groupToIndex;
     //vector<string> groupVector;
     
-    fastqRead readFastq(ifstream&);
+    fastqRead readFastq(ifstream&, bool&);
     vector< vector<string> > readFastqFiles(int&);
     bool checkReads(fastqRead&, fastqRead&);
-    int createProcesses(vector< vector<string> >, string, string, string);
-    int driver(vector<string>, string, string, string);
+    int createProcesses(vector< vector<string> >, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >);
+    int driver(vector<string>, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >);
     bool getOligos(vector<vector<string> >&, vector<vector<string> >&);
     string reverseOligo(string);
+    vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques);
 };
 
 /**************************************************************************************************/
@@ -86,15 +96,26 @@ private:
 struct contigsData {
        string outputFasta; 
        string outputQual; 
+    string outputScrapFasta; 
+       string outputScrapQual;
        string outputMisMatches;
        string align;
     vector<string> files;
+    vector<vector<string> > fastaFileNames;
+    vector<vector<string> > qualFileNames;
        MothurOut* m;
        float match, misMatch, gapOpen, gapExtend;
-       int count, threshold, threadID;
+       int count, threshold, threadID, pdiffs, bdiffs, tdiffs;
+    bool allFiles, createGroup;
+    map<string, int> groupCounts; 
+    map<string, string> groupMap;
+    vector<string> primerNameVector;   
+       vector<string> barcodeNameVector;
+    map<int, oligosPair> barcodes;
+       map<int, oligosPair> primers;
        
        contigsData(){}
-       contigsData(vector<string> f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
+       contigsData(vector<string> f, string of, string oq, string osf, string osq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, int tid) {
         files = f;
                outputFasta = of;
         outputQual = oq;
@@ -107,6 +128,19 @@ struct contigsData {
         threshold = thr;
                align = al;
                count = 0;
+        outputScrapFasta = osf;
+        outputScrapQual = osq;
+        fastaFileNames = ffn;
+        qualFileNames = qfn;
+        barcodes = br;
+        primers = pr;
+        barcodeNameVector = bnv;
+        primerNameVector = pnv;
+        pdiffs = pdf;
+        bdiffs = bdf;
+        tdiffs = tdf;
+        allFiles = all;
+        createGroup = cg;
                threadID = tid;
        }
 };
@@ -132,32 +166,69 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
         
         if (pDataArray->m->debug) {  pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
         
+               if(pDataArray->allFiles){
+                       for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
+                               for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
+                                       if (pDataArray->fastaFileNames[i][j] != "") {
+                                               ofstream temp;
+                                               pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
+                        pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                  temp.close();
+                                       }
+                               }
+                       }
+               }
+        
         ifstream inFFasta, inRFasta, inFQual, inRQual;
         pDataArray->m->openInputFile(thisffastafile, inFFasta);
         pDataArray->m->openInputFile(thisfqualfile, inFQual);
         pDataArray->m->openInputFile(thisrfastafile, inRFasta);
         pDataArray->m->openInputFile(thisrqualfile, inRQual);
         
-        ofstream outFasta, outQual, outMisMatch;
+        ofstream outFasta, outQual, outMisMatch, outScrapFasta, outScrapQual;
         pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
         pDataArray->m->openOutputFile(pDataArray->outputQual, outQual);
         pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
+        pDataArray->m->openOutputFile(pDataArray->outputScrapFasta, outScrapFasta);
+        pDataArray->m->openOutputFile(pDataArray->outputScrapQual, outScrapQual);
         outMisMatch << "Name\tLength\tMisMatches\n";
         
+        TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes);
+        
         while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
             
             if (pDataArray->m->control_pressed) { break; }
             
+            int success = 1;
+            string trashCode = "";
+            int currentSeqsDiffs = 0;
+            
             //read seqs and quality info
             Sequence fSeq(inFFasta); pDataArray->m->gobble(inFFasta);
             Sequence rSeq(inRFasta); pDataArray->m->gobble(inRFasta);
             QualityScores fQual(inFQual); pDataArray->m->gobble(inFQual);
             QualityScores rQual(inRQual); pDataArray->m->gobble(inRQual);
             
+            int barcodeIndex = 0;
+            int primerIndex = 0;
+            
+            if(pDataArray->barcodes.size() != 0){
+                success = trimOligos.stripBarcode(fSeq, rSeq, fQual, rQual, barcodeIndex);
+                if(success > pDataArray->bdiffs)               {       trashCode += 'b';       }
+                else{ currentSeqsDiffs += success;  }
+            }
+            
+            if(pDataArray->primers.size() != 0){
+                success = trimOligos.stripForward(fSeq, rSeq, fQual, rQual, primerIndex);
+                if(success > pDataArray->pdiffs)               {       trashCode += 'f';       }
+                else{ currentSeqsDiffs += success;  }
+            }
+            
+            if (currentSeqsDiffs > pDataArray->tdiffs) {       trashCode += 't';   }
+            
             //flip the reverse reads
             rSeq.reverseComplement();
             rQual.flipQScores();
-            
+           
             //pairwise align
             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
@@ -172,29 +243,51 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
             int numMismatches = 0;
             string seq1 = fSeq.getAligned();
             string seq2 = rSeq.getAligned();
-            
             vector<int> scores1 = fQual.getQualityScores();
             vector<int> scores2 = rQual.getQualityScores();
             
-            for (int i = 0; i < length; i++) {
+            int overlapStart = fSeq.getStartPos();
+            int seq2Start = rSeq.getStartPos();
+            //bigger of the 2 starting positions is the location of the overlapping start
+            if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
+                overlapStart = seq2Start; 
+                for (int i = 0; i < overlapStart; i++) {
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                }
+            }else { //seq1 starts later so take from 0 to overlapStart from seq2
+                for (int i = 0; i < overlapStart; i++) {
+                    contig += seq2[i];
+                    contigScores.push_back(scores2[BBaseMap[i]]);
+                }
+            }
+            
+            int seq1End = fSeq.getEndPos();
+            int seq2End = rSeq.getEndPos();
+            int overlapEnd = seq1End;
+            if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
+            
+            for (int i = overlapStart; i < overlapEnd; i++) {
                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
                     contig += seq1[i];
                     contigScores.push_back(scores1[ABaseMap[i]]);
-                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
-                    if (scores2[BBaseMap[i]] >= pDataArray->threshold) {
+                    if (scores2[BBaseMap[i]] < pDataArray->threshold) { } //
+                    else {
                         contig += seq2[i];
                         contigScores.push_back(scores2[BBaseMap[i]]);
                     }
                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
-                    if (scores1[ABaseMap[i]] >= pDataArray->threshold) { 
+                    if (scores1[ABaseMap[i]] < pDataArray->threshold) { } //
+                    else {
                         contig += seq1[i];
                         contigScores.push_back(scores1[ABaseMap[i]]);
                     }
                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
                     char c = seq1[i];
                     contigScores.push_back(scores1[ABaseMap[i]]);
-                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; }
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
                     contig += c;
                     numMismatches++;
                 }else { //should never get here
@@ -202,13 +295,70 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                 }
             }
             
-            //output
-            outFasta << ">" << fSeq.getName() << endl << contig << endl;
-            outQual << ">" << fSeq.getName() << endl;
-            for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
-            outQual << endl;
-            outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
-            
+            if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
+                for (int i = overlapEnd; i < length; i++) {
+                    contig += seq2[i];
+                    contigScores.push_back(scores2[BBaseMap[i]]);
+                }
+            }else { //seq2 ends before seq1 so take from overlap to length from seq1
+                for (int i = overlapEnd; i < length; i++) {
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                }
+                
+            }
+
+            if(trashCode.length() == 0){
+                if (pDataArray->createGroup) {
+                    if(pDataArray->barcodes.size() != 0){
+                        string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
+                        if (pDataArray->primers.size() != 0) { 
+                            if (pDataArray->primerNameVector[primerIndex] != "") { 
+                                if(thisGroup != "") {
+                                    thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
+                                }else {
+                                    thisGroup = pDataArray->primerNameVector[primerIndex]; 
+                                }
+                            } 
+                        }
+                        
+                        if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); }
+                        
+                        pDataArray->groupMap[fSeq.getName()] = thisGroup; 
+                        
+                        map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
+                        if (it == pDataArray->groupCounts.end()) {     pDataArray->groupCounts[thisGroup] = 1; }
+                        else { pDataArray->groupCounts[it->first] ++; }
+                        
+                    }
+                }
+                
+                if(pDataArray->allFiles){
+                    ofstream output;
+                    pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
+                    output << ">" << fSeq.getName() << endl << contig << endl;
+                    output.close();
+                    
+                    pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
+                    output << ">" << fSeq.getName() << endl;
+                    for (int i = 0; i < contigScores.size(); i++) { output << contigScores[i] << ' '; }
+                    output << endl;
+                    output.close();                                                    
+                }
+                
+                //output
+                outFasta << ">" << fSeq.getName() << endl << contig << endl;
+                outQual << ">" << fSeq.getName() << endl;
+                for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+                outQual << endl;
+                outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+            }else {
+                //output
+                outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
+                outScrapQual << ">" << fSeq.getName() << " | " << trashCode << endl;
+                for (int i = 0; i < contigScores.size(); i++) { outScrapQual << contigScores[i] << ' '; }
+                outScrapQual << endl;
+            }
             num++;
             
                        //report progress
@@ -225,9 +375,11 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
         outFasta.close();
         outQual.close();
         outMisMatch.close();
+        outScrapFasta.close();
+        outScrapQual.close();
         delete alignment;
         
-        if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputFasta);  pDataArray->m->mothurRemove(pDataArray->outputMisMatches);}
+        if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputFasta);  pDataArray->m->mothurRemove(pDataArray->outputMisMatches); pDataArray->m->mothurRemove(pDataArray->outputScrapQual); pDataArray->m->mothurRemove(pDataArray->outputScrapFasta);}
         
         return 0;
                
index 7d40e80ead727205f4bcc00269f78bd7f81057ca..37c0916d4c49e1f8ed8ca979c7ff61b7e1a13ef3 100644 (file)
@@ -1541,6 +1541,46 @@ vector<string> MothurOut::splitWhiteSpace(string input){
                exit(1);
        }
 }
+/***********************************************************************/
+vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
+       try {
+        vector<string> pieces;
+        string rest = "";
+        
+        int pos = input.find('\'');
+        int pos2 = input.find('\"');
+        
+        if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
+        else {
+            for (int i = 0; i < input.length(); i++) {
+                if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
+                    rest += input[i];
+                    for (int j = i+1; j < input.length(); j++) {
+                        if ((input[j] == '\'') || (input[j] == '\"')) {  //then quit
+                            rest += input[j];
+                            i = j+1;
+                            j+=input.length();
+                        }else { rest += input[j]; }
+                    }
+                }else if (!isspace(input[i]))  { rest += input[i];  }
+                else {
+                    if (rest != "") { pieces.push_back(rest);  rest = ""; }
+                    while (i < input.length()) {  //gobble white space
+                        if (isspace(input[i])) { i++; }
+                        else { rest = input[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
+                    } 
+                }
+            }
+            
+            if (rest != "") { pieces.push_back(rest); }
+        }
+        return pieces;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "splitWhiteSpace");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
        try {
@@ -2688,6 +2728,35 @@ bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
                exit(1);
        }       
 }
+/**************************************************************************************************/
+
+bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
+       try {
+               for (int i = 0; i < sets.size(); i++) {
+                       if (set == sets[i]) { return true; }
+               }
+               return false;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "inUsersGroups");
+               exit(1);
+       }       
+}
+/**************************************************************************************************/
+
+bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
+       try {
+               for (int i = 0; i < Groups.size(); i++) {
+                       if (groupname == Groups[i]) { return true; }
+               }
+               return false;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "inUsersGroups");
+               exit(1);
+       }       
+}
+
 /**************************************************************************************************/
 //returns true if any of the strings in first vector are in second vector
 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
index 53d4250c771cdbdac15c715c44d97fabf0a92c1a..0d2e86f24cb258b24e78367e1427fc7ee2bc7d31 100644 (file)
@@ -120,7 +120,9 @@ class MothurOut {
                bool checkReleaseVersion(ifstream&, string);
                bool anyLabelsToProcess(string, set<string>&, string);
                bool inUsersGroups(vector<string>, vector<string>);
+        bool inUsersGroups(vector<int>, vector< vector<int> >);
                bool inUsersGroups(string, vector<string>);
+        bool inUsersGroups(int, vector<int>);
                void getNumSeqs(ifstream&, int&);
                int getNumSeqs(ifstream&);
                int getNumNames(string);
@@ -139,6 +141,7 @@ class MothurOut {
                void splitAtDash(string&, vector<string>&);
                void splitAtChar(string&, vector<string>&, char);
         void splitAtChar(string&, string&, char);
+        vector<string> splitWhiteSpaceWithQuotes(string);
                int removeConfidences(string&);
         string removeQuotes(string);
         string makeList(vector<string>&);
index 06f83a4f1a390536a9952d10d413ec22613b89cd..968d76744be5b296a9b9f98a5294356366783bce 100644 (file)
@@ -16,6 +16,7 @@ vector<string> OTUAssociationCommand::setParameters(){
                CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
                CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
         CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
+        CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
@@ -37,9 +38,10 @@ string OTUAssociationCommand::getHelpString(){
                string helpString = "";
                helpString += "The otu.association command reads a shared or relabund file and calculates the correlation coefficients between otus.\n";
         helpString += "If you provide a metadata file, mothur will calculate te correlation bewteen the metadata and the otus.\n";
-               helpString += "The otu.association command parameters are shared, relabund, metadata, groups, method and label.  The shared or relabund parameter is required.\n";
+               helpString += "The otu.association command parameters are shared, relabund, metadata, groups, method, cutoff and label.  The shared or relabund parameter is required.\n";
                helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
                helpString += "The label parameter allows you to select what distances level you would like used, and are also separated by dashes.\n";
+        helpString += "The cutoff parameter allows you to set a pvalue at which the otu will be reported.\n";
                helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
                helpString += "The otu.association command should be in the following format: otu.association(shared=yourSharedFile, method=yourMethod).\n";
                helpString += "Example otu.association(shared=genus.pool.shared, method=kendall).\n";
@@ -195,6 +197,10 @@ OTUAssociationCommand::OTUAssociationCommand(string option)  {
                        
                        method = validParameter.validFile(parameters, "method", false);         if (method == "not found"){     method = "pearson";             }
                        
+            string temp = validParameter.validFile(parameters, "cutoff", false);
+                       if (temp == "not found") { temp = "10"; }
+                       m->mothurConvert(temp, cutoff); 
+            
                        if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; }
                        
                }
@@ -356,7 +362,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
+                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
                }else { //compare otus to metadata
@@ -373,7 +379,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], metadata[k], sig);     }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
+                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
 
@@ -515,7 +521,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
+                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
                }else { //compare otus to metadata
@@ -532,7 +538,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], metadata[k], sig);     }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
+                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
             
index 64995ac38e146ff6994ab17f624e329fd848cd57..7f7664be5eda5568f17723e8ccede599c1169f8b 100644 (file)
@@ -36,6 +36,7 @@ private:
        
        string sharedfile, relabundfile, metadatafile, groups, label, inputFileName, outputDir, method;
        bool abort, pickedGroups, allLines;
+    double cutoff;
        set<string> labels;
     vector<SharedRAbundFloatVector*> metadataLookup;
     vector< vector< double> > metadata;
index 38d3bdf929f4ffc110f76ab0a55b12d66ef4ad08..eddddfa0534f547cfa1802b75211f1c6c9950f65 100644 (file)
@@ -272,11 +272,14 @@ int PhylotypeCommand::execute(){
                                 map<string, string>::iterator itNames = namemap.find(names[i]);  //make sure this name is in namefile
                                 
                                 if (itNames != namemap.end()) {  name += namemap[names[i]] + ",";   } //you found it in namefile
-                                else { m->mothurOut(names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); exit(1);  }
+                                else { m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
                                 
                             }else{   name += names[i] + ",";   }
                         }
                                        }
+                    
+                    if (m->control_pressed) { break; }
+                    
                                        name = name.substr(0, name.length()-1);  //rip off extra ','
                                        //add bin to list vector
                                        if (name != "") { list.push_back(name); } //caused by unknown
index a9bb5737eac832a203969460f7f4e2759849a975..a2500949a9c207a5975b5734050533c1d1ea317f 100644 (file)
@@ -450,7 +450,7 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                        if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
                                        
                        Sequence current(in); m->gobble(in);
-       
+           
                        if (current.getName() != "") {
                                
                                int num = 1;
index a359607a3ea11fa118efed129407ee608d4903e5..96662bc36d4b77e1cb8b7a884f4e8460969033f4 100644 (file)
@@ -300,7 +300,7 @@ string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
                        if(letter == '>'){
                                fastaFile.putback(letter);
                                break;
-                       }
+                       }else if (letter == ' ') {;}
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
@@ -354,7 +354,7 @@ string Sequence::getSequenceString(istringstream& fastaFile, int& numAmbig) {
                        if(letter == '>'){
                                fastaFile.putback(letter);
                                break;
-                       }
+                       }else if (letter == ' ') {;}
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
index b8b91028625cec0cfd1ef59d7dc82ab67b40cde5..fa32fc43a873d22482cfcb8e4172343f3516cbce 100644 (file)
@@ -152,8 +152,6 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma
                
                m->saveNextLabel = nextLabel;
                m->setAllGroups(allGroups);
-        for (int i = 0; i < allGroups.size(); i++) { cout << allGroups[i] << endl; }
-       
        }
        catch(exception& e) {
                m->errorOut(e, "SharedRAbundFloatVector", "SharedRAbundFloatVector");
index 392f97bd51f1d2ac67d6d0a810a408ab507fdcee..a6b1b2d58da2daa5cc53d17d4bbb814b56921ffd 100644 (file)
@@ -24,7 +24,7 @@ Tree* SubSample::getSample(Tree* T, CountTable* ct, CountTable* newCt, int size)
         for (int i = 0; i < Groups.size(); i++) {
             if (m->inUsersGroups(Groups[i], m->getGroups())) {
                 if (m->control_pressed) { break; }
-        
+                cout << Groups[i] << endl;
                 int thisSize = ct->getGroupCount(Groups[i]);
                 
                 if (thisSize >= size) {        
index 8f4cbe98a7488b9d552f07b6ffd17aeb5e858c23..c35971a676f1c9af3de404bb5f0a4ca707a82f1f 100644 (file)
@@ -28,6 +28,33 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
                revPrimer = r;
         linker = lk;
         spacer = sp;
+        maxFBarcodeLength = 0;
+        for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
+            if(it->first.length() > maxFBarcodeLength){
+                maxFBarcodeLength = it->first.length();
+            }
+        }
+        maxFPrimerLength = 0;
+        map<string,int>::iterator it; 
+        for(it=primers.begin();it!=primers.end();it++){
+            if(it->first.length() > maxFPrimerLength){
+                maxFPrimerLength = it->first.length();
+            }
+        }
+        
+        maxLinkerLength = 0;
+        for(int i = 0; i < linker.size(); i++){
+            if(linker[i].length() > maxLinkerLength){
+                maxLinkerLength = linker[i].length();
+            }
+        }
+        
+        maxSpacerLength = 0;
+        for(int i = 0; i < spacer.size(); i++){
+            if(spacer[i].length() > maxSpacerLength){
+                maxSpacerLength = spacer[i].length();
+            }
+        }
        }
        catch(exception& e) {
                m->errorOut(e, "TrimOligos", "TrimOligos");
@@ -36,7 +63,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
 }
 /********************************************************************/
 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
-TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br, vector<string> lk, vector<string> sp){
+TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
        try {
                m = MothurOut::getInstance();
                
@@ -45,10 +72,60 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<
         ldiffs = l;
         sdiffs = s;
                
-               ibarcodes = br;
-               iprimers = pr;
-        linker = lk;
-        spacer = sp;
+        maxFBarcodeLength = 0;
+        for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
+            string forward = it->second.forward;
+            map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
+            
+            if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length();  }
+            
+            if (itForward == ifbarcodes.end()) {
+                vector<int> temp; temp.push_back(it->first);
+                ifbarcodes[forward] = temp;
+            }else { itForward->second.push_back(it->first); }
+        }
+
+        maxFPrimerLength = 0;
+        for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
+            string forward = it->second.forward;
+            map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
+            
+            if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length();  }
+            
+            if (itForward == ifprimers.end()) {
+                vector<int> temp; temp.push_back(it->first);
+                ifprimers[forward] = temp;
+            }else { itForward->second.push_back(it->first); }
+        }
+        
+        maxRBarcodeLength = 0;
+        for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
+            string forward = it->second.reverse;
+            map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
+            
+            if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length();  }
+            
+            if (itForward == irbarcodes.end()) {
+                vector<int> temp; temp.push_back(it->first);
+                irbarcodes[forward] = temp;
+            }else { itForward->second.push_back(it->first); }
+        }
+        
+        maxRPrimerLength = 0;
+        for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
+            string forward = it->second.reverse;
+            map<string, vector<int> >::iterator itForward = irprimers.find(forward);
+            
+            if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length();  }
+            
+            if (itForward == irprimers.end()) {
+                vector<int> temp; temp.push_back(it->first);
+                irprimers[forward] = temp;
+            }else { itForward->second.push_back(it->first); }
+        }
+
+        ipbarcodes = br;
+        ipprimers = pr;
        }
        catch(exception& e) {
                m->errorOut(e, "TrimOligos", "TrimOligos");
@@ -107,21 +184,9 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                if ((bdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (barcodes.size() > 0) {
-                               map<string,int>::iterator it; 
-                               
-                               for(it=barcodes.begin();it!=barcodes.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
+                       else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -133,7 +198,7 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                                string oligo = it->first;
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                               if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
                                        success = bdiffs + 10;
                                        break;
                                }
@@ -202,7 +267,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                int success = bdiffs + 1;       //guilty until proven innocent
                
                //can you find the forward barcode
-               for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
+               for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
                        string foligo = it->second.forward;
             string roligo = it->second.reverse;
             
@@ -229,37 +294,45 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                //if you found the barcode or if you don't want to allow for diffs
                if ((bdiffs == 0) || (success == 0)) { return success;  }
                else { //try aligning and see if you can find it
-                       
-            //look for forward
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (ibarcodes.size() > 0) {
-                               for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
-                                       if(it->second.forward.length() > maxLength){ maxLength = it->second.forward.length();  }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-                       }else{ alignment = NULL; } 
+                       if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
+                       else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
                        int minCount = 1;
-                       int minFGroup = -1;
-                       int minFPos = 0;
-                       
-                       for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
-                               string oligo = it->second.forward;
+                       vector< vector<int> > minFGroup;
+                       vector<int> minFPos; 
+            
+            //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+            /*
+                1   Sarah   Westcott
+                2   John    Westcott
+                3   Anna    Westcott
+                4   Sarah   Schloss
+                5   Pat     Schloss
+                6   Gail    Brown
+                7   Pat     Moore
+             
+                only want to look for forward = Sarah, John, Anna, Pat, Gail
+                                      reverse = Westcott, Schloss, Brown, Moore
+             
+                but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
+             */
+            //cout << endl << forwardSeq.getName() << endl;         
+                       for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
+                               string oligo = it->first;
                                
-                               if(rawFSequence.length() < maxLength){  //let's just assume that the barcodes are the same length
+                               if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
                                        success = bdiffs + 10;
                                        break;
                                }
-                               
+                               //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
                                alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
-                               
+               
                                int alnLength = oligo.length();
                                
                                for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
@@ -267,87 +340,136 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                                temp = temp.substr(0,alnLength);
                 int numDiff = countDiffs(oligo, temp);
                                
+                if (alnLength == 0) { numDiff = bdiffs + 100; }
+                //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                
                                if(numDiff < minDiff){
                                        minDiff = numDiff;
                                        minCount = 1;
-                                       minFGroup = it->first;
-                                       minFPos = 0;
+                                       minFGroup.clear();
+                    minFGroup.push_back(it->second);
+                                       int tempminFPos = 0;
+                    minFPos.clear();
                                        for(int i=0;i<alnLength;i++){
                                                if(temp[i] != '-'){
-                                                       minFPos++;
+                                                       tempminFPos++;
                                                }
                                        }
+                    minFPos.push_back(tempminFPos);
                                }else if(numDiff == minDiff){
-                                       minCount++;
+                                       minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       tempminFPos++;
+                                               }
+                                       }
+                    minFPos.push_back(tempminFPos);
                                }
-                               
                        }
-                       
+            
+                       //cout << minDiff << '\t' << minCount << '\t' << endl;
                        if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
                        else{   
                 //check for reverse match
                 if (alignment != NULL) {  delete alignment;  }
-                maxLength = 0;
                 
-                if (ibarcodes.size() > 0) {
-                    for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
-                        if(it->second.reverse.length() > maxLength){ maxLength = it->second.reverse.length();  }
-                    }
-                    alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-                }else{ alignment = NULL; } 
+                if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
+                else{ alignment = NULL; } 
                 
                 //can you find the barcode
                 minDiff = 1e6;
                 minCount = 1;
-                int minRGroup = -1;
-                int minRPos = 0;
+                vector< vector<int> > minRGroup;
+                vector<int> minRPos;
                 
-                for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
-                    string oligo = it->second.reverse;
-                    
-                    if(rawRSequence.length() < maxLength){     //let's just assume that the barcodes are the same length
+                for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
+                    string oligo = it->first;
+                    //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
+                    if(rawRSequence.length() < maxRBarcodeLength){     //let's just assume that the barcodes are the same length
                         success = bdiffs + 10;
                         break;
                     }
                     
                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                    alignment->align(oligo, rawRSequence.substr((rawRSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
+                    alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
                     oligo = alignment->getSeqAAln();
                     string temp = alignment->getSeqBAln();
                     
                     int alnLength = oligo.length();
-                    for(int i=0;i<alnLength;i++){ if(oligo[i] != '-'){ alnLength = i;  break;  } }
+                    for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1;        break;  } }
                     oligo = oligo.substr(0,alnLength);
                     temp = temp.substr(0,alnLength);
                     int numDiff = countDiffs(oligo, temp);
+                    if (alnLength == 0) { numDiff = bdiffs + 100; }
                     
+                    //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
                     if(numDiff < minDiff){
                         minDiff = numDiff;
                         minCount = 1;
-                        minRGroup = it->first;
-                        minRPos = 0;
+                        minRGroup.clear();
+                        minRGroup.push_back(it->second);
+                        int tempminRPos = 0;
+                        minRPos.clear();
                         for(int i=0;i<alnLength;i++){
                             if(temp[i] != '-'){
-                                minRPos++;
+                                tempminRPos++;
                             }
                         }
+                        minRPos.push_back(tempminRPos);                    
                     }else if(numDiff == minDiff){
-                        minCount++;
+                        int tempminRPos = 0;
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);  
+                        minRGroup.push_back(it->second);
                     }
                     
                 }
-
+                
+                /*cout << minDiff << '\t' << minCount << '\t' << endl;
+                for (int i = 0; i < minFGroup.size(); i++) { 
+                    cout << i << '\t';
+                    for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
+                    cout << endl;
+                }
+                cout << endl;
+                for (int i = 0; i < minRGroup.size(); i++) { 
+                    cout << i << '\t';
+                    for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
+                    cout << endl;
+                }
+                cout << endl;*/
                 if(minDiff > bdiffs)   {       success = minDiff;              }       //no good matches
-                else if(minCount > 1)  {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
-                else{
+                else  {  
+                    bool foundMatch = false;
+                    vector<int> matches;
+                    for (int i = 0; i < minFGroup.size(); i++) {
+                        for (int j = 0; j < minFGroup[i].size(); j++) {
+                            for (int k = 0; k < minRGroup.size(); k++) {
+                                if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+                            }
+                        }
+                    }
+                    
+                    int fStart = 0;
+                    int rStart = 0;
+                    if (matches.size() == 1) { 
+                        foundMatch = true;
+                        group = matches[0]; 
+                        fStart = minFPos[0];
+                        rStart = minRPos[0];
+                    }
+                    
                     //we have an acceptable match for the forward and reverse, but do they match?
-                    if (minFGroup == minRGroup) {
-                        group = minFGroup;
-                        forwardSeq.setUnaligned(rawFSequence.substr(minFPos));
-                        reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-minRPos)));
-                        forwardQual.trimQScores(minFPos, -1);
-                        reverseQual.trimQScores(-1, rawRSequence.length()-minRPos);
+                    if (foundMatch) {
+                        forwardSeq.setUnaligned(rawFSequence.substr(fStart));
+                        reverseSeq.setUnaligned(rawRSequence.substr(rStart));
+                        forwardQual.trimQScores(fStart, -1);
+                        reverseQual.trimQScores(rStart, -1);
                         success = minDiff;
                     }else { success = bdiffs + 100;    }
                 }
@@ -366,255 +488,252 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
        
 }
 //*******************************************************************/
-int TrimOligos::stripBarcode(Sequence& seq, int& group){
+int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
        try {
+               //look for forward barcode
+               string rawFSequence = forwardSeq.getUnaligned();
+        string rawRSequence = reverseSeq.getUnaligned();
+               int success = pdiffs + 1;       //guilty until proven innocent
                
-               string rawSequence = seq.getUnaligned();
-               int success = bdiffs + 1;       //guilty until proven innocent
-               
-               //can you find the barcode
-               for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
-                       string oligo = it->first;
-                       if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
-                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+               //can you find the forward barcode
+               for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
+                       string foligo = it->second.forward;
+            string roligo = it->second.reverse;
+            
+                       if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
+                               success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                               break;  
+                       }
+            if(rawRSequence.length() < roligo.length()){       //let's just assume that the barcodes are the same length
+                               success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
                                break;  
                        }
                        
-                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
-                               group = it->second;
-                               seq.setUnaligned(rawSequence.substr(oligo.length()));
-                               
+                       if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+                               group = it->first;
+                               forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
+                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                forwardQual.trimQScores(foligo.length(), -1);
+                reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
                                success = 0;
                                break;
                        }
                }
                
                //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
-               
+               if ((pdiffs == 0) || (success == 0)) { return success;  }
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (barcodes.size() > 0) {
-                               map<string,int>::iterator it=barcodes.begin();
-                               
-                               for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
+                       else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
                        int minCount = 1;
-                       int minGroup = -1;
-                       int minPos = 0;
-                       
-                       for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
+                       vector< vector<int> > minFGroup;
+                       vector<int> minFPos; 
+            
+            //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+            /*
+             1   Sarah   Westcott
+             2   John    Westcott
+             3   Anna    Westcott
+             4   Sarah   Schloss
+             5   Pat     Schloss
+             6   Gail    Brown
+             7   Pat     Moore
+             
+             only want to look for forward = Sarah, John, Anna, Pat, Gail
+             reverse = Westcott, Schloss, Brown, Moore
+             
+             but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
+             */
+            //cout << endl << forwardSeq.getName() << endl;         
+                       for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
                                string oligo = it->first;
-                               //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
-                                       success = bdiffs + 10;
+                               if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
+                                       success = pdiffs + 10;
                                        break;
                                }
-                               
+                               //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
+                               alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
-                                
+                
                                int alnLength = oligo.length();
                                
-                               for(int i=oligo.length()-1;i>=0;i--){
-                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
-                               }
+                               for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
                                oligo = oligo.substr(0,alnLength);
                                temp = temp.substr(0,alnLength);
-                                
-                               int numDiff = countDiffs(oligo, temp);
+                int numDiff = countDiffs(oligo, temp);
                                
+                if (alnLength == 0) { numDiff = pdiffs + 100; }
+                //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                
                                if(numDiff < minDiff){
                                        minDiff = numDiff;
                                        minCount = 1;
-                                       minGroup = it->second;
-                                       minPos = 0;
+                                       minFGroup.clear();
+                    minFGroup.push_back(it->second);
+                                       int tempminFPos = 0;
+                    minFPos.clear();
                                        for(int i=0;i<alnLength;i++){
                                                if(temp[i] != '-'){
-                                                       minPos++;
+                                                       tempminFPos++;
                                                }
                                        }
-                               }
-                               else if(numDiff == minDiff){
-                                       minCount++;
-                               }
-                               
-                       }
-                       
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
-                       else{                                                                                                   //use the best match
-                               group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(minPos));
-                               success = minDiff;
-                       }
-                       
-                       if (alignment != NULL) {  delete alignment;  }
-                       
-               }
-               
-               return success;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimOligos", "stripBarcode");
-               exit(1);
-       }
-       
-}
-/*******************************************************************
-int TrimOligos::stripRBarcode(Sequence& seq, QualityScores& qual, int& group){
-       try {
-               
-               string rawSequence = seq.getUnaligned();
-               int success = bdiffs + 1;       //guilty until proven innocent
-               
-               //can you find the barcode
-               for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
-                       string oligo = it->first;
-                       if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
-                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
-                               break;  
-                       }
-            
-                       if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
-                               group = it->second;
-                               seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
-                               
-                               if(qual.getName() != ""){
-                                       qual.trimQScores(-1, rawSequence.length()-oligo.length());
-                               }
-                               
-                               success = 0;
-                               break;
-                       }
-               }
-               
-               //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
-               
-               else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
-                       Alignment* alignment;
-                       if (rbarcodes.size() > 0) {
-                               map<string,int>::iterator it; 
-                               
-                               for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
-                       
-                       //can you find the barcode
-                       int minDiff = 1e6;
-                       int minCount = 1;
-                       int minGroup = -1;
-                       int minPos = 0;
-                       
-                       for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
-                               string oligo = it->first;
-                               //                              int length = oligo.length();
-                               
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
-                                       success = bdiffs + 10;
-                                       break;
-                               }
-                               
-                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
-                               oligo = alignment->getSeqAAln();
-                               string temp = alignment->getSeqBAln();
-     
-                               int alnLength = oligo.length();
-                               
-                               for(int i=0;i<alnLength;i++){
-                                       if(oligo[i] != '-'){    alnLength = i;  break;  }
-                               }
-                               oligo = oligo.substr(alnLength);
-                               temp = temp.substr(alnLength);
-                               
-                               int numDiff = countDiffs(oligo, temp);
-                               
-                               if(numDiff < minDiff){
-                                       minDiff = numDiff;
-                                       minCount = 1;
-                                       minGroup = it->second;
-                                       minPos = 0;
-                                       for(int i=alnLength-1;i>=0;i--){
+                    minFPos.push_back(tempminFPos);
+                               }else if(numDiff == minDiff){
+                                       minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                                       for(int i=0;i<alnLength;i++){
                                                if(temp[i] != '-'){
-                                                       minPos++;
+                                                       tempminFPos++;
                                                }
                                        }
+                    minFPos.push_back(tempminFPos);
                                }
-                               else if(numDiff == minDiff){
-                                       minCount++;
-                               }
-                               
                        }
-                       
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
-                       else{                                                                                                   //use the best match
-                               group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
+            
+                       //cout << minDiff << '\t' << minCount << '\t' << endl;
+                       if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
+                       else{   
+                //check for reverse match
+                if (alignment != NULL) {  delete alignment;  }
                 
-                               if(qual.getName() != ""){
-                                       qual.trimQScores(-1, (rawSequence.length()-minPos));
-                               }
-                               success = minDiff;
+                if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
+                else{ alignment = NULL; } 
+                
+                //can you find the barcode
+                minDiff = 1e6;
+                minCount = 1;
+                vector< vector<int> > minRGroup;
+                vector<int> minRPos;
+                
+                for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
+                    string oligo = it->first;
+                    //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
+                    if(rawRSequence.length() < maxRPrimerLength){      //let's just assume that the barcodes are the same length
+                        success = pdiffs + 10;
+                        break;
+                    }
+                    
+                    //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                    alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+                    oligo = alignment->getSeqAAln();
+                    string temp = alignment->getSeqBAln();
+                    
+                    int alnLength = oligo.length();
+                    for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1;        break;  } }
+                    oligo = oligo.substr(0,alnLength);
+                    temp = temp.substr(0,alnLength);
+                    int numDiff = countDiffs(oligo, temp);
+                    if (alnLength == 0) { numDiff = pdiffs + 100; }
+                    
+                    //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                    if(numDiff < minDiff){
+                        minDiff = numDiff;
+                        minCount = 1;
+                        minRGroup.clear();
+                        minRGroup.push_back(it->second);
+                        int tempminRPos = 0;
+                        minRPos.clear();
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);                    
+                    }else if(numDiff == minDiff){
+                        int tempminRPos = 0;
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);  
+                        minRGroup.push_back(it->second);
+                    }
+                    
+                }
+                
+                /*cout << minDiff << '\t' << minCount << '\t' << endl;
+                 for (int i = 0; i < minFGroup.size(); i++) { 
+                 cout << i << '\t';
+                 for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
+                 cout << endl;
+                 }
+                 cout << endl;
+                 for (int i = 0; i < minRGroup.size(); i++) { 
+                 cout << i << '\t';
+                 for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
+                 cout << endl;
+                 }
+                 cout << endl;*/
+                if(minDiff > pdiffs)   {       success = minDiff;              }       //no good matches
+                else  {  
+                    bool foundMatch = false;
+                    vector<int> matches;
+                    for (int i = 0; i < minFGroup.size(); i++) {
+                        for (int j = 0; j < minFGroup[i].size(); j++) {
+                            for (int k = 0; k < minRGroup.size(); k++) {
+                                if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+                            }
+                        }
+                    }
+                    
+                    int fStart = 0;
+                    int rStart = 0;
+                    if (matches.size() == 1) { 
+                        foundMatch = true;
+                        group = matches[0]; 
+                        fStart = minFPos[0];
+                        rStart = minRPos[0];
+                    }
+                    
+                    //we have an acceptable match for the forward and reverse, but do they match?
+                    if (foundMatch) {
+                        forwardSeq.setUnaligned(rawFSequence.substr(fStart));
+                        reverseSeq.setUnaligned(rawRSequence.substr(rStart));
+                        forwardQual.trimQScores(fStart, -1);
+                        reverseQual.trimQScores(rStart, -1);
+                        success = minDiff;
+                    }else { success = pdiffs + 100;    }
+                }
                        }
                        
                        if (alignment != NULL) {  delete alignment;  }
-                       
                }
                
                return success;
                
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimOligos", "stripRBarcode");
+               m->errorOut(e, "TrimOligos", "stripIForward");
                exit(1);
        }
        
 }
-/*******************************************************************
-int TrimOligos::stripRBarcode(Sequence& seq, int& group){
+//*******************************************************************/
+int TrimOligos::stripBarcode(Sequence& seq, int& group){
        try {
                
                string rawSequence = seq.getUnaligned();
                int success = bdiffs + 1;       //guilty until proven innocent
                
                //can you find the barcode
-               for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
+               for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
                        string oligo = it->first;
                        if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
                                success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
                                break;  
                        }
-            
-                       if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
                                group = it->second;
-                               seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
+                               seq.setUnaligned(rawSequence.substr(oligo.length()));
                                
                                success = 0;
                                break;
@@ -625,21 +744,9 @@ int TrimOligos::stripRBarcode(Sequence& seq, int& group){
                if ((bdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
-                       Alignment* alignment;
-                       if (rbarcodes.size() > 0) {
-                               map<string,int>::iterator it; 
-                               
-                               for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+            Alignment* alignment;
+                       if (barcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));  }
+                       else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -647,28 +754,28 @@ int TrimOligos::stripRBarcode(Sequence& seq, int& group){
                        int minGroup = -1;
                        int minPos = 0;
                        
-                       for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
+                       for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
                                string oligo = it->first;
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                               if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
                                        success = bdiffs + 10;
                                        break;
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
+                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
-                
+                                
                                int alnLength = oligo.length();
                                
-                               for(int i=0;i<alnLength;i++){
-                                       if(oligo[i] != '-'){    alnLength = i;  break;  }
+                               for(int i=oligo.length()-1;i>=0;i--){
+                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
                                }
-                               oligo = oligo.substr(alnLength);
-                               temp = temp.substr(alnLength);
-                               
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                                
                                int numDiff = countDiffs(oligo, temp);
                                
                                if(numDiff < minDiff){
@@ -676,7 +783,7 @@ int TrimOligos::stripRBarcode(Sequence& seq, int& group){
                                        minCount = 1;
                                        minGroup = it->second;
                                        minPos = 0;
-                                       for(int i=alnLength-1;i>=0;i--){
+                                       for(int i=0;i<alnLength;i++){
                                                if(temp[i] != '-'){
                                                        minPos++;
                                                }
@@ -692,8 +799,7 @@ int TrimOligos::stripRBarcode(Sequence& seq, int& group){
                        else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
                        else{                                                                                                   //use the best match
                                group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
-                
+                               seq.setUnaligned(rawSequence.substr(minPos));
                                success = minDiff;
                        }
                        
@@ -705,12 +811,13 @@ int TrimOligos::stripRBarcode(Sequence& seq, int& group){
                
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimOligos", "stripRBarcode");
+               m->errorOut(e, "TrimOligos", "stripBarcode");
                exit(1);
        }
        
 }
-//********************************************************************/
+
+/********************************************************************/
 int TrimOligos::stripForward(Sequence& seq, int& group){
        try {
                
@@ -737,21 +844,9 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
                if ((pdiffs == 0) || (success == 0)) {  return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (primers.size() > 0) {
-                               map<string,int>::iterator it; 
-                               
-                               for(it=primers.begin();it!=primers.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
+            else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -763,7 +858,7 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
                                string oligo = it->first;
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   
+                               if(rawSequence.length() < maxFPrimerLength){    
                                        success = pdiffs + 100;
                                        break;
                                }
@@ -849,21 +944,9 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, boo
                if ((pdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (primers.size() > 0) {
-                               map<string,int>::iterator it; 
-                               
-                               for(it=primers.begin();it!=primers.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
+                       else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -875,7 +958,7 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, boo
                                string oligo = it->first;
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   
+                               if(rawSequence.length() < maxFPrimerLength){    
                                        success = pdiffs + 100;
                                        break;
                                }
@@ -1024,19 +1107,9 @@ bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
                if ((ldiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (linker.size() > 0) {
-                               for(int i = 0; i < linker.size(); i++){
-                                       if(linker[i].length() > maxLength){
-                                               maxLength = linker[i].length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));   }     
+                       else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -1047,7 +1120,7 @@ bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
                                string oligo = linker[i];
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                               if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
                                        success = ldiffs + 10;
                                        break;
                                }
@@ -1133,19 +1206,9 @@ bool TrimOligos::stripLinker(Sequence& seq){
                if ((ldiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (linker.size() > 0) {
-                               for(int i = 0; i < linker.size(); i++){
-                                       if(linker[i].length() > maxLength){
-                                               maxLength = linker[i].length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));  }
+            else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -1156,7 +1219,7 @@ bool TrimOligos::stripLinker(Sequence& seq){
                                string oligo = linker[i];
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                               if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
                                        success = ldiffs + 10;
                                        break;
                                }
@@ -1240,19 +1303,9 @@ bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
                if ((sdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (spacer.size() > 0) {
-                               for(int i = 0; i < spacer.size(); i++){
-                                       if(spacer[i].length() > maxLength){
-                                               maxLength = spacer[i].length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
+            else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -1263,7 +1316,7 @@ bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
                                string oligo = spacer[i];
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                               if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
                                        success = sdiffs + 10;
                                        break;
                                }
@@ -1349,19 +1402,9 @@ bool TrimOligos::stripSpacer(Sequence& seq){
                if ((sdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
                        Alignment* alignment;
-                       if (spacer.size() > 0) {
-                               for(int i = 0; i < spacer.size(); i++){
-                                       if(spacer[i].length() > maxLength){
-                                               maxLength = spacer[i].length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
+                       if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
+            else{ alignment = NULL; } 
                        
                        //can you find the barcode
                        int minDiff = 1e6;
@@ -1372,7 +1415,7 @@ bool TrimOligos::stripSpacer(Sequence& seq){
                                string oligo = spacer[i];
                                //                              int length = oligo.length();
                                
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                               if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
                                        success = sdiffs + 10;
                                        break;
                                }
index fb8f74dcb4f309387ccf3caa923354a466c9fccb..6716deb464eb0198b00159490d1f33572c6c6d23 100644 (file)
@@ -29,7 +29,7 @@ class TrimOligos {
        public:
         TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
         TrimOligos(int,int, int, int, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer
-        TrimOligos(int,int, int, int, map<int, oligosPair>, map<int, oligosPair>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, linker, spacer
+        TrimOligos(int,int, int, int, map<int, oligosPair>, map<int, oligosPair>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes
                ~TrimOligos();
        
                int stripBarcode(Sequence&, int&);      
@@ -58,8 +58,14 @@ class TrimOligos {
                vector<string> revPrimer;
         vector<string> linker;
         vector<string> spacer;
-        map<int, oligosPair> ibarcodes;
-        map<int, oligosPair> iprimers;
+        map<string, vector<int> > ifbarcodes;
+        map<string, vector<int> > ifprimers;
+        map<string, vector<int> > irbarcodes;
+        map<string, vector<int> > irprimers;
+        map<int, oligosPair> ipbarcodes;
+        map<int, oligosPair> ipprimers;
+    
+        int maxFBarcodeLength, maxRBarcodeLength, maxFPrimerLength, maxRPrimerLength, maxLinkerLength, maxSpacerLength;
        
                MothurOut* m;
        
index 0c21c8993552929a5299841a5089783f7b8c2aef..ba368fc8569fdfde5d3587ffc5a1e65240e8acdf 100644 (file)
@@ -1036,10 +1036,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
-               for( int i=0; i<processors-1; i++){
+               for( int h=0; h<processors-1; h++){
                        
             string extension = "";
-                       if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+                       if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
             vector<vector<string> > tempNameFileNames = nameFileNames;
@@ -1081,14 +1081,14 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               tempFASTAFileNames,
                                               tempPrimerQualFileNames,
                                               tempNameFileNames,
-                                              lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
+                                              lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
-                       hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+                       hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
                }
         
         //parent do my part
@@ -1103,8 +1103,32 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->openOutputFile(trimNameFileName, temp);              temp.close();
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
+        vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+        vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+        vector<vector<string> > tempNameFileNames = nameFileNames;
+        if(allFiles){
+            ofstream temp;
+            string extension = toString(processors-1) + ".temp";
+            for(int i=0;i<tempFASTAFileNames.size();i++){
+                for(int j=0;j<tempFASTAFileNames[i].size();j++){
+                    if (tempFASTAFileNames[i][j] != "") {
+                        tempFASTAFileNames[i][j] += extension;
+                        m->openOutputFile(tempFASTAFileNames[i][j], temp);                     temp.close();
+                        
+                        if(qFileName != ""){
+                            tempPrimerQualFileNames[i][j] += extension;
+                            m->openOutputFile(tempPrimerQualFileNames[i][j], temp);            temp.close();
+                        }
+                        if(nameFile != ""){
+                            tempNameFileNames[i][j] += extension;
+                            m->openOutputFile(tempNameFileNames[i][j], temp);          temp.close();
+                        }
+                    }
+                }
+            }
+        }
         
-               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
         processIDS.push_back(processors-1);
 
         
index cbec7490bfe028ea02de0b4a1e024b1ce2d89e1f..e698fffb58e435ddae9d3f496f5f19d33923c5c2 100644 (file)
@@ -384,16 +384,20 @@ int UnifracWeightedCommand::execute() {
             //subsample loop
             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
-                
                 if (m->control_pressed) { break; }
                 
                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
                 CountTable* newCt = new CountTable();
                 
+                int sampleTime = 0;
+                if (m->debug) { sampleTime = time(NULL); }
+                
                 //uses method of setting groups to doNotIncludeMe
                 SubSample sample;
                 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
-               
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
+                
                 //call new weighted function
                 vector<double> iterData; iterData.resize(numComp,0);
                 Weighted thisWeighted(includeRoot);