]> git.donarmstrong.com Git - mothur.git/commitdiff
added dups parameter to chimera.uchime. working on make.contigs command.
authorSarah Westcott <mothur.westcott@gmail.com>
Tue, 9 Oct 2012 12:56:20 +0000 (08:56 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Tue, 9 Oct 2012 12:56:20 +0000 (08:56 -0400)
12 files changed:
chimerauchimecommand.cpp
chimerauchimecommand.h
makecontigscommand.cpp
makecontigscommand.h
removeseqscommand.cpp
removeseqscommand.h
sffinfocommand.cpp
sffinfocommand.h
trimoligos.cpp
trimoligos.h
trimseqscommand.cpp
trimseqscommand.h

index 7ff3989dea527060934b417de22ef5078df7a074..478156814cb4e429bebc7f7f1c4f4d55db148d4a 100644 (file)
@@ -35,6 +35,8 @@ vector<string> ChimeraUchimeCommand::setParameters(){
                CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
                CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
                CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
+        CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
+
                //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
                CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
                CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
@@ -59,12 +61,13 @@ string ChimeraUchimeCommand::getHelpString(){
                string helpString = "";
                helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
                helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
-               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
+               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dups, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
+        helpString += "If the dups parameter is true, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=t.\n";
                helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
                helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
@@ -557,6 +560,15 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
 
                        temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
                        skipgaps2 = m->isTrue(temp); 
+            
+            string usedDups = "true";
+                       temp = validParameter.validFile(parameters, "dups", false);     
+                       if (temp == "not found") { 
+                               if (groupfile != "")    {  temp = "true";                                       }
+                               else                    {  temp = "false"; usedDups = "";       }
+                       }
+                       dups = m->isTrue(temp);
+
                        
                        if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                        if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
@@ -713,12 +725,14 @@ int ChimeraUchimeCommand::execute(){
                                if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
                 if (hasCount) { delete cparser; }
                 else { delete sparser; }
-
-                               int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
-                               
-                               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
-                               m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                
+                if (dups) { 
+                    int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
                                
+                    m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");     m->mothurOutEndLine();
+                    m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                               }
+                
                                if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
                                        
                        }else{
@@ -800,7 +814,7 @@ int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, s
                        //find unique name
                        itUnique = uniqueNames.find(name);
                        
-                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
                        else {
                                itChimeras = chimerasInFile.find((itUnique->second));
                                
@@ -1168,11 +1182,8 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                vector<char*> cPara;
                
                string uchimeCommand = uchimeLocation;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               uchimeCommand += " ";
-#else
-               uchimeCommand = "\"" + uchimeCommand + "\"";
-#endif
+        uchimeCommand = "\"" + uchimeCommand + "\" ";
+        
         char* tempUchime;
                tempUchime= new char[uchimeCommand.length()+1]; 
                *tempUchime = '\0';
index 39c3141a1bf95e6b4506887771c2bfaa85834327..a5b42751dfd74ea3eb1fc3dc4ccf9bfa5bb9822b 100644 (file)
@@ -47,7 +47,7 @@ private:
        int driver(string, string, string, string, int&);
        int createProcesses(string, string, string, string, int&);
                
-       bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName;
+       bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
        string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
        int processors;
        
@@ -758,6 +758,8 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
                for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
                //int numArgs = cPara.size();
                
+        commandString = "\"" + commandString + "\"";
+        
                //uchime_main(numArgs, uchimeParameters); 
                //cout << "commandString = " << commandString << endl;
         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
index 691d706ab00efa8823412753b72f08e5198873b1..4ae25ce0e2623ec9db0dc6568a37df7e64382140 100644 (file)
@@ -13,7 +13,15 @@ vector<string> MakeContigsCommand::setParameters(){
        try {
                CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
         CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
-               CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
+               CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+
+        CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
+        CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
                CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
                CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
                CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
@@ -37,15 +45,22 @@ string MakeContigsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
-               helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
+        helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
+               helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
                helpString += "The ffastq and rfastq parameters are required.\n";
                helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
+        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
                helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
                helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
                helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
         helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
+        helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
         helpString += "The make.contigs command should be in the following format: \n";
                helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
                helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
@@ -68,6 +83,7 @@ string MakeContigsCommand::getOutputFileNameTag(string type, string inputName=""
         else {
             if (type == "fasta")             {   outputFileName =  "contigs.fasta";         }
             else if (type == "qfile")        {   outputFileName =  "contigs.qual";          }
+            else if (type == "group")            {   outputFileName =  "groups";   }
             else if (type == "mismatch")     {   outputFileName =  "contigs.mismatch";      }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
@@ -86,6 +102,7 @@ MakeContigsCommand::MakeContigsCommand(){
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["group"] = tempOutNames;
         outputTypes["mismatch"] = tempOutNames;
        }
        catch(exception& e) {
@@ -121,6 +138,7 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
             outputTypes["mismatch"] = tempOutNames;
+            outputTypes["group"] = tempOutNames;
                        
             
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
@@ -143,6 +161,14 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
             }
             
             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
@@ -153,6 +179,11 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
                        else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
             
+            oligosfile = validParameter.validFile(parameters, "oligos", true);
+                       if (oligosfile == "not found")      {   oligosfile = "";        }
+                       else if(oligosfile == "not open")   {   abort = true;       } 
+                       else {   m->setOligosFile(oligosfile);          }
+            
             //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
                        
@@ -182,6 +213,26 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
+            
+            temp = validParameter.validFile(parameters, "bdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, bdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs);
+            
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
+                       m->mothurConvert(temp, tdiffs);
+                       
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+
+            temp = validParameter.validFile(parameters, "allfiles", false);            if (temp == "not found") { temp = "F"; }
+                       allFiles = m->isTrue(temp);
                        
                        align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
                        if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
@@ -239,6 +290,12 @@ int MakeContigsCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
                }
+        
+        string currentGroup = "";
+               itTypes = outputTypes.find("group");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
+               }
                
         //output files created by command
                m->mothurOutEndLine();
@@ -246,7 +303,6 @@ int MakeContigsCommand::execute(){
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                m->mothurOutEndLine();
 
-        
         return 0;
     }
        catch(exception& e) {
@@ -700,6 +756,262 @@ bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
         exit(1);
     }
 }
+//***************************************************************************************************************
+//illumina data requires paired forward and reverse data
+//BARCODE   atgcatgc   atgcatgc    groupName 
+//PRIMER   atgcatgc   atgcatgc    groupName  
+//PRIMER   atgcatgc   atgcatgc  
+bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
+       try {
+               ifstream in;
+               m->openInputFile(oligosfile, in);
+               
+               ofstream test;
+               
+               string type, foligo, roligo, group;
+        
+               int indexPrimer = 0;
+               int indexBarcode = 0;
+        set<string> uniquePrimers;
+        set<string> uniqueBarcodes;
+               
+               while(!in.eof()){
+            
+                       in >> type; 
+            
+                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
+            
+                       if(type[0] == '#'){
+                               while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
+                               m->gobble(in);
+                       }
+                       else{
+                               m->gobble(in);
+                               //make type case insensitive
+                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
+                               
+                               in >> foligo;
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
+                               
+                               for(int i=0;i<foligo.length();i++){
+                                       foligo[i] = toupper(foligo[i]);
+                                       if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
+                               }
+                               
+                               if(type == "PRIMER"){
+                                       m->gobble(in);
+                                       
+                    in >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    roligo = reverseOligo(roligo);
+                    
+                    group = "";
+                    
+                                       // get rest of line in case there is a primer name
+                                       while (!in.eof())       {       
+                                               char c = in.get(); 
+                                               if (c == 10 || c == 13){        break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       } 
+                    
+                    oligosPair newPrimer(foligo, roligo);
+                                       
+                                       //check for repeat barcodes
+                    string tempPair = foligo+roligo;
+                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
+                    else { uniquePrimers.insert(tempPair); }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
+                    
+                                       primers[indexPrimer]=newPrimer; indexPrimer++;          
+                                       primerNameVector.push_back(group);
+                               }else if(type == "BARCODE"){
+                                       m->gobble(in);
+                                       
+                    in >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    roligo = reverseOligo(roligo);
+                    
+                    oligosPair newPair(foligo, roligo);
+                    
+                    group = "";
+                    while (!in.eof())  {       
+                                               char c = in.get(); 
+                                               if (c == 10 || c == 13){        break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       } 
+                                       
+                    if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+                        
+                    //check for repeat barcodes
+                    string tempPair = foligo+roligo;
+                    if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
+                    else { uniqueBarcodes.insert(tempPair); }
+                        
+                    barcodes[indexBarcode]=newPair; indexBarcode++;
+                                       barcodeNameVector.push_back(group);
+                               }else if(type == "LINKER"){
+                                       linker.push_back(foligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(foligo);
+                               }
+                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
+                       }
+                       m->gobble(in);
+               }       
+               in.close();
+               
+               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
+               
+               //add in potential combos
+               if(barcodeNameVector.size() == 0){
+            oligosPair temp("", "");
+                       barcodes[0] = temp;
+                       barcodeNameVector.push_back("");                        
+               }
+               
+               if(primerNameVector.size() == 0){
+            oligosPair temp("", "");
+                       primers[0] = temp;
+                       primerNameVector.push_back("");                 
+               }
+               
+               fastaFileNames.resize(barcodeNameVector.size());
+               for(int i=0;i<fastaFileNames.size();i++){
+                       fastaFileNames[i].assign(primerNameVector.size(), "");
+               }
+               qualFileNames = fastaFileNames; 
+               
+               if(allFiles){
+                       set<string> uniqueNames; //used to cleanup outputFileNames
+                       for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                               for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                                       
+                                       string primerName = primerNameVector[itPrimer->first];
+                                       string barcodeName = barcodeNameVector[itBar->first];
+                                       
+                                       string comboGroupName = "";
+                                       string fastaFileName = "";
+                                       string qualFileName = "";
+                                       string nameFileName = "";
+                    string countFileName = "";
+                                       
+                                       if(primerName == ""){
+                                               comboGroupName = barcodeNameVector[itBar->first];
+                                       }
+                                       else{
+                                               if(barcodeName == ""){
+                                                       comboGroupName = primerNameVector[itPrimer->first];
+                                               }
+                                               else{
+                                                       comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+                                               }
+                                       }
+                                       
+                                       
+                                       ofstream temp;
+                                       fastaFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + comboGroupName + ".fasta";
+                                       if (uniqueNames.count(fastaFileName) == 0) {
+                                               outputNames.push_back(fastaFileName);
+                                               outputTypes["fasta"].push_back(fastaFileName);
+                                               uniqueNames.insert(fastaFileName);
+                                       }
+                                       
+                                       fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
+                                       m->openOutputFile(fastaFileName, temp);         temp.close();
+                                       
+                                       
+                    qualFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + comboGroupName + ".qual";
+                    if (uniqueNames.count(qualFileName) == 0) {
+                        outputNames.push_back(qualFileName);
+                        outputTypes["qfile"].push_back(qualFileName);
+                    }
+                                               
+                    qualFileNames[itBar->first][itPrimer->first] = qualFileName;
+                    m->openOutputFile(qualFileName, temp);             temp.close();
+                               }
+                       }
+               }
+               
+               bool allBlank = true;
+               for (int i = 0; i < barcodeNameVector.size(); i++) {
+                       if (barcodeNameVector[i] != "") {
+                               allBlank = false;
+                               break;
+                       }
+               }
+               for (int i = 0; i < primerNameVector.size(); i++) {
+                       if (primerNameVector[i] != "") {
+                               allBlank = false;
+                               break;
+                       }
+               }
+        
+               if (allBlank) {
+                       m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
+                       allFiles = false;
+                       return false;
+               }
+               
+               return true;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "getOligos");
+               exit(1);
+       }
+}
+//********************************************************************/
+string MakeContigsCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "reverseOligo");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 
 
index 2308b657acf7951be6373a527f44546b555a7d2f..84e43c01c9a0b8c7775c6a46494a6fc0fcb642a6 100644 (file)
@@ -17,7 +17,7 @@
 #include "needlemanoverlap.hpp"
 #include "blastalign.hpp"
 #include "noalign.hpp"
-
+#include "trimoligos.h"
 
 struct fastqRead {
        vector<int> scores;
@@ -50,17 +50,31 @@ public:
     void help() { m->mothurOut(getHelpString()); }     
     
 private:
-    bool abort;
-    string outputDir, ffastqfile, rfastqfile, align;
+    bool abort, allFiles;
+    string outputDir, ffastqfile, rfastqfile, align, oligosfile;
        float match, misMatch, gapOpen, gapExtend;
-       int processors, longestBase, threshold;
+       int processors, longestBase, threshold, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
     vector<string> outputNames;
     
+    map<int, oligosPair> barcodes;
+       map<int, oligosPair> primers;
+    vector<string>  linker;
+    vector<string>  spacer;
+       vector<string> primerNameVector;        
+       vector<string> barcodeNameVector;       
+    
+       map<string, int> groupCounts;  
+    //map<string, int> combos;
+       //map<string, int> groupToIndex;
+    //vector<string> groupVector;
+    
     fastqRead readFastq(ifstream&);
     vector< vector<string> > readFastqFiles(int&);
     bool checkReads(fastqRead&, fastqRead&);
     int createProcesses(vector< vector<string> >, string, string, string);
     int driver(vector<string>, string, string, string);
+    bool getOligos(vector<vector<string> >&, vector<vector<string> >&);
+    string reverseOligo(string);
 };
 
 /**************************************************************************************************/
index 73873f3a4744eb0e9ce59a9b2640bf013f524852..00b94a9dac842ceeca4a2a0362d87f14ba12ca1b 100644 (file)
@@ -406,6 +406,12 @@ int RemoveSeqsCommand::readFasta(){
                        if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
                        
                        Sequence currSeq(in);
+            
+            if (!dups) {//adjust name if needed
+                map<string, string>::iterator it = uniqueMap.find(currSeq.getName());
+                if (it != uniqueMap.end()) { currSeq.setName(it->second); }
+            }
+
                        name = currSeq.getName();
                        
                        if (name != "") {
@@ -413,7 +419,7 @@ int RemoveSeqsCommand::readFasta(){
                                if (names.count(name) == 0) {
                                        wroteSomething = true;
                                        
-                                       currSeq.printSequence(out);
+                    currSeq.printSequence(out);
                                }else {  removedCount++;  }
                        }
                        m->gobble(in);
@@ -477,6 +483,11 @@ int RemoveSeqsCommand::readQual(){
                        
                        m->gobble(in);
                        
+            if (!dups) {//adjust name if needed
+                map<string, string>::iterator it = uniqueMap.find(saveName);
+                if (it != uniqueMap.end()) { name = ">" + it->second; saveName = it->second; }
+            }
+            
                        if (names.count(saveName) == 0) {
                                wroteSomething = true;
                                
@@ -695,6 +706,8 @@ int RemoveSeqsCommand::readName(){
                                                wroteSomething = true;
                                                
                                                out << validSecond[0] << '\t';
+                        //we are changing the unique name in the fasta file
+                        uniqueMap[firstCol] = validSecond[0];
                                                
                                                //you know you have at least one valid second since first column is valid
                                                for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
@@ -788,9 +801,15 @@ int RemoveSeqsCommand::readTax(){
                        in >> name;                             //read from first column
                        in >> tax;                      //read from second column
                        
+            if (!dups) {//adjust name if needed
+                map<string, string>::iterator it = uniqueMap.find(name);
+                if (it != uniqueMap.end()) { name = it->second; }
+            }
+            
                        //if this name is in the accnos file
                        if (names.count(name) == 0) {
                                wroteSomething = true;
+            
                                out << name << '\t' << tax << endl;
                        }else {  removedCount++;  }
                                        
@@ -840,6 +859,11 @@ int RemoveSeqsCommand::readAlign(){
                        if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
                        
                        in >> name;                             //read from first column
+            
+            if (!dups) {//adjust name if needed
+                map<string, string>::iterator it = uniqueMap.find(name);
+                if (it != uniqueMap.end()) { name = it->second; }
+            }
                        
                        //if this name is in the accnos file
                        if (names.count(name) == 0) {
index 151b413070bc64bafd80834e72228dabd1037c43..e26e751032402cd39dac933acf3f57814ab2fc0f 100644 (file)
@@ -37,6 +37,7 @@ class RemoveSeqsCommand : public Command {
                string accnosfile, fastafile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
                vector<string> outputNames;
+        map<string, string> uniqueMap;
                
                int readFasta();
                int readName();
index d7ca230449fbebd3488a512c5fbd25d3159c1156..c50255aeb2ae9c202074b78d89d460651cefdc43 100644 (file)
@@ -1037,7 +1037,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H
 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {
        try {
         //find group read belongs to
-        TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
+        TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
         
         int success = 1;
         string trashCode = "";
@@ -1082,12 +1082,6 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
             else{ currentSeqsDiffs += success;  }
         }
         
-        if(rbarcodes.size() != 0){
-            success = trimOligos.stripRBarcode(currSeq, currQual, barcode);
-            if(success > bdiffs)               {       trashCode += 'b';       }
-            else{ currentSeqsDiffs += success;  }
-        }
-        
         if(numSpacers != 0){
             success = trimOligos.stripSpacer(currSeq, currQual);
             if(success > sdiffs)               {       trashCode += 's';       }
@@ -1675,36 +1669,13 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                }
                                else if(type == "BARCODE"){
                                        inOligos >> group;
-                    
-                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
-                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
-                                       string temp = "";
-                    while (!inOligos.eof())    {       
-                                               char c = inOligos.get(); 
-                                               if (c == 10 || c == 13){        break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  temp += c;  }
-                                       } 
                                        
-                    //then this is illumina data with 4 columns
-                    if (temp != "") {  
-                        string reverseBarcode = reverseOligo(group); //reverse barcode
-                        group = temp;
-                        
-                        //check for repeat barcodes
-                        map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
-                        if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
-                                               
-                        rbarcodes[reverseBarcode]=indexBarcode; 
-                    }
-                    
                                        //check for repeat barcodes
                                        map<string, int>::iterator itBar = barcodes.find(oligo);
                                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
                     
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
-
                                }else if(type == "LINKER"){
                                        linker.push_back(oligo);
                                }else if(type == "SPACER"){
index dd3c57fa747646a94d26cebd9420fbbf4525f3f6..4917a274029f778e957791a744d66b5ff059890c 100644 (file)
@@ -84,7 +84,6 @@ private:
        int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs;
        set<string> seqNames;
     map<string, int> barcodes;
-    map<string, int> rbarcodes;
     map<string, int> primers;
     vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
     vector<vector<int> > numSplitReads;
index 2f92cc847ca75e1a0983aeef2d09e9eaab68e57f..8f4cbe98a7488b9d552f07b6ffd17aeb5e858c23 100644 (file)
@@ -14,7 +14,7 @@
 
 /********************************************************************/
 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
-TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, map<string, int> rbr, vector<string> r, vector<string> lk, vector<string> sp){
+TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
        try {
                m = MothurOut::getInstance();
                
@@ -24,7 +24,6 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
         sdiffs = s;
                
                barcodes = br;
-        rbarcodes = rbr;
                primers = pr;
                revPrimer = r;
         linker = lk;
@@ -37,7 +36,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
 }
 /********************************************************************/
 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
-TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
+TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br, vector<string> lk, vector<string> sp){
        try {
                m = MothurOut::getInstance();
                
@@ -46,9 +45,8 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
         ldiffs = l;
         sdiffs = s;
                
-               barcodes = br;
-               primers = pr;
-               revPrimer = r;
+               ibarcodes = br;
+               iprimers = pr;
         linker = lk;
         spacer = sp;
        }
@@ -194,6 +192,177 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                m->errorOut(e, "TrimOligos", "stripBarcode");
                exit(1);
        }
+}
+//*******************************************************************/
+int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
+       try {
+               //look for forward barcode
+               string rawFSequence = forwardSeq.getUnaligned();
+        string rawRSequence = reverseSeq.getUnaligned();
+               int success = bdiffs + 1;       //guilty until proven innocent
+               
+               //can you find the forward barcode
+               for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
+                       string foligo = it->second.forward;
+            string roligo = it->second.reverse;
+            
+                       if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
+                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                               break;  
+                       }
+            if(rawRSequence.length() < roligo.length()){       //let's just assume that the barcodes are the same length
+                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                               break;  
+                       }
+                       
+                       if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+                               group = it->first;
+                               forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
+                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                forwardQual.trimQScores(foligo.length(), -1);
+                reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+                               success = 0;
+                               break;
+                       }
+               }
+               
+               //if you found the barcode or if you don't want to allow for diffs
+               if ((bdiffs == 0) || (success == 0)) { return success;  }
+               else { //try aligning and see if you can find it
+                       
+            //look for forward
+                       int maxLength = 0;
+                       
+                       Alignment* alignment;
+                       if (ibarcodes.size() > 0) {
+                               for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
+                                       if(it->second.forward.length() > maxLength){ maxLength = it->second.forward.length();  }
+                               }
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
+                       }else{ alignment = NULL; } 
+                       
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       int minFGroup = -1;
+                       int minFPos = 0;
+                       
+                       for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
+                               string oligo = it->second.forward;
+                               
+                               if(rawFSequence.length() < maxLength){  //let's just assume that the barcodes are the same length
+                                       success = bdiffs + 10;
+                                       break;
+                               }
+                               
+                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                               alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
+                               oligo = alignment->getSeqAAln();
+                               string temp = alignment->getSeqBAln();
+                               
+                               int alnLength = oligo.length();
+                               
+                               for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                int numDiff = countDiffs(oligo, temp);
+                               
+                               if(numDiff < minDiff){
+                                       minDiff = numDiff;
+                                       minCount = 1;
+                                       minFGroup = it->first;
+                                       minFPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       minFPos++;
+                                               }
+                                       }
+                               }else if(numDiff == minDiff){
+                                       minCount++;
+                               }
+                               
+                       }
+                       
+                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
+                       else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
+                       else{   
+                //check for reverse match
+                if (alignment != NULL) {  delete alignment;  }
+                maxLength = 0;
+                
+                if (ibarcodes.size() > 0) {
+                    for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
+                        if(it->second.reverse.length() > maxLength){ maxLength = it->second.reverse.length();  }
+                    }
+                    alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
+                }else{ alignment = NULL; } 
+                
+                //can you find the barcode
+                minDiff = 1e6;
+                minCount = 1;
+                int minRGroup = -1;
+                int minRPos = 0;
+                
+                for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
+                    string oligo = it->second.reverse;
+                    
+                    if(rawRSequence.length() < maxLength){     //let's just assume that the barcodes are the same length
+                        success = bdiffs + 10;
+                        break;
+                    }
+                    
+                    //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                    alignment->align(oligo, rawRSequence.substr((rawRSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
+                    oligo = alignment->getSeqAAln();
+                    string temp = alignment->getSeqBAln();
+                    
+                    int alnLength = oligo.length();
+                    for(int i=0;i<alnLength;i++){ if(oligo[i] != '-'){ alnLength = i;  break;  } }
+                    oligo = oligo.substr(0,alnLength);
+                    temp = temp.substr(0,alnLength);
+                    int numDiff = countDiffs(oligo, temp);
+                    
+                    if(numDiff < minDiff){
+                        minDiff = numDiff;
+                        minCount = 1;
+                        minRGroup = it->first;
+                        minRPos = 0;
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                minRPos++;
+                            }
+                        }
+                    }else if(numDiff == minDiff){
+                        minCount++;
+                    }
+                    
+                }
+
+                if(minDiff > bdiffs)   {       success = minDiff;              }       //no good matches
+                else if(minCount > 1)  {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
+                else{
+                    //we have an acceptable match for the forward and reverse, but do they match?
+                    if (minFGroup == minRGroup) {
+                        group = minFGroup;
+                        forwardSeq.setUnaligned(rawFSequence.substr(minFPos));
+                        reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-minRPos)));
+                        forwardQual.trimQScores(minFPos, -1);
+                        reverseQual.trimQScores(-1, rawRSequence.length()-minRPos);
+                        success = minDiff;
+                    }else { success = bdiffs + 100;    }
+                }
+                       }
+                       
+                       if (alignment != NULL) {  delete alignment;  }
+               }
+               
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripIBarcode");
+               exit(1);
+       }
        
 }
 //*******************************************************************/
@@ -308,7 +477,7 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){
        }
        
 }
-//*******************************************************************/
+/*******************************************************************
 int TrimOligos::stripRBarcode(Sequence& seq, QualityScores& qual, int& group){
        try {
                
@@ -428,7 +597,7 @@ int TrimOligos::stripRBarcode(Sequence& seq, QualityScores& qual, int& group){
        }
        
 }
-//*******************************************************************/
+/*******************************************************************
 int TrimOligos::stripRBarcode(Sequence& seq, int& group){
        try {
                
index a32b3d8e4f2d388b15b3aa68ed66fa61f33c1681..fb8f74dcb4f309387ccf3caa923354a466c9fccb 100644 (file)
 #include "sequence.hpp"
 #include "qualityscores.h"
 
+struct oligosPair {
+       string forward;
+       string reverse;
+       
+       oligosPair() { forward = ""; reverse = "";  }
+       oligosPair(string f, string r) : forward(f), reverse(r) {}
+       ~oligosPair() {}
+};
 
 class TrimOligos {
        
        public:
         TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
-        TrimOligos(int,int, int, int, map<string, int>, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimers, linker, spacer
-        TrimOligos(int,int, int, int, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimers, linker, spacer
+        TrimOligos(int,int, int, int, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer
+        TrimOligos(int,int, int, int, map<int, oligosPair>, map<int, oligosPair>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, linker, spacer
                ~TrimOligos();
        
                int stripBarcode(Sequence&, int&);      
                int stripBarcode(Sequence&, QualityScores&, int&);
-    
-        int stripRBarcode(Sequence&, int&);    
-        int stripRBarcode(Sequence&, QualityScores&, int&);
-       
+        int stripBarcode(Sequence&, Sequence&, QualityScores&, QualityScores&, int&);
+       
                int stripForward(Sequence&, int&);
                int stripForward(Sequence&, QualityScores&, int&, bool);
+        int stripForward(Sequence&, Sequence&, QualityScores&, QualityScores&, int&);
        
                bool stripReverse(Sequence&);
                bool stripReverse(Sequence&, QualityScores&);
@@ -47,11 +54,12 @@ class TrimOligos {
                int pdiffs, bdiffs, ldiffs, sdiffs;
        
                map<string, int> barcodes;
-        map<string, int> rbarcodes;
                map<string, int> primers;
                vector<string> revPrimer;
         vector<string> linker;
         vector<string> spacer;
+        map<int, oligosPair> ibarcodes;
+        map<int, oligosPair> iprimers;
        
                MothurOut* m;
        
index bbb0b367a59de85b94e6dd9e561b697d37832b00..0c21c8993552929a5299841a5089783f7b8c2aef 100644 (file)
@@ -660,7 +660,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                int count = 0;
                bool moreSeqs = 1;
-               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
+               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
        
                while (moreSeqs) {
                                
@@ -704,12 +704,6 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        if(success > bdiffs)            {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
-                
-                if(rbarcodes.size() != 0){
-                                       success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
-                                       if(success > bdiffs)            {       trashCode += 'b';       }
-                                       else{ currentSeqsDiffs += success;  }
-                               }
                                
                 if(numSpacers != 0){
                                        success = trimOligos.stripSpacer(currSeq, currQual);
@@ -1088,7 +1082,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               tempPrimerQualFileNames,
                                               tempNameFileNames,
                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
-                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
+                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
@@ -1435,40 +1429,11 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                }
                                else if(type == "BARCODE"){
                                        inOligos >> group;
-                    
-                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
-                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
-                                       string temp = "";
-                    while (!inOligos.eof())    {       
-                                               char c = inOligos.get(); 
-                                               if (c == 10 || c == 13){        break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  temp += c;  }
-                                       } 
                                        
-                    //then this is illumina data with 4 columns
-                    if (temp != "") {  
-                        
-                        for(int i=0;i<group.length();i++){
-                            group[i] = toupper(group[i]);
-                            if(group[i] == 'U')        {       group[i] = 'T'; }
-                        }
-                        
-                        if (m->debug) { m->mothurOut("[DEBUG]: reading reverse " + group + ", and group = " + temp + ".\n"); }
-                        
-                        string reverseBarcode = reverseOligo(group); //reverse barcode
-                        //check for repeat barcodes
-                        map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
-                        if (itBar != rbarcodes.end()) { m->mothurOut("reverse barcode " + group + " is in your oligos file already."); m->mothurOutEndLine();  }
-                        
-                        group = temp;
-                        rbarcodes[reverseBarcode]=indexBarcode; 
-                    }else { if (m->debug) { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); } }
-                        
                                        //check for repeat barcodes
                                        map<string, int>::iterator itBar = barcodes.find(oligo);
                                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                                               
+                    
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
                                }else if(type == "LINKER"){
index 8d9a57a54ffd0db053956200602f3aa06fb4de5b..1ffad218ccee951e7ac9097d04f84327099fda9d 100644 (file)
@@ -61,7 +61,6 @@ private:
        vector<string> revPrimer, outputNames;
        set<string> filesToRemove;
        map<string, int> barcodes;
-    map<string, int> rbarcodes;
        vector<string> groupVector;
        map<string, int> primers;
     vector<string>  linker;
@@ -102,7 +101,6 @@ struct trimData {
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
     vector<string> revPrimer;
        map<string, int> barcodes;
-    map<string, int> rbarcodes;
        map<string, int> primers;
     map<string, int> nameCount;
     vector<string>  linker;
@@ -116,7 +114,7 @@ struct trimData {
     
        trimData(){}
        trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
-                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, map<string, int> rbar, vector<string> revP, vector<string> li, vector<string> spa, 
+                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, 
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
                       int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
@@ -149,7 +147,6 @@ struct trimData {
         sdiffs = sd;
         tdiffs = td;
         barcodes = bar;
-        rbarcodes = rbar;
         primers = pri;      numFPrimers = primers.size();
         revPrimer = revP;   numRPrimers = revPrimer.size();
         linker = li;        numLinkers = linker.size();
@@ -254,7 +251,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                }
                
                
-               TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->rbarcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
+               TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
         
                pDataArray->count = pDataArray->lineEnd;
                for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
@@ -299,12 +296,6 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                        else{ currentSeqsDiffs += success;  }
                                }
                 
-                               if(pDataArray->rbarcodes.size() != 0){
-                                       success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
-                                       if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
-                                       else{ currentSeqsDiffs += success;  }
-                               }
-                
                 if(pDataArray->numSpacers != 0){
                                        success = trimOligos.stripSpacer(currSeq, currQual);
                                        if(success > pDataArray->sdiffs)                {       trashCode += 's';       }