]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
trim.seqs linker and spacer mods
[mothur.git] / trimseqscommand.cpp
index 4d6b45da96664677f61908874d79b8d847f0e74c..ae9f707aa2fe5ace59e59d1697a8bf6e50319421 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "trimseqscommand.h"
 #include "needlemanoverlap.hpp"
+#include "trimoligos.h"
 
 //**********************************************************************************************************************
 vector<string> TrimSeqsCommand::setParameters(){       
@@ -24,9 +25,12 @@ vector<string> TrimSeqsCommand::setParameters(){
                CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-               CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
+               CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
                CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
                CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
                CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
@@ -63,9 +67,11 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
                helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
-               helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
+               helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                helpString += "The qfile parameter allows you to provide a quality file.\n";
                helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
                helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
@@ -74,6 +80,7 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
                helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
                helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
+               helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
                helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
                helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
@@ -189,6 +196,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else if (fastaFile == "not open") { abort = true; }    
+                       else { m->setFastaFile(fastaFile); }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -201,50 +209,56 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        // ...at some point should added some additional type checking...
                        string temp;
                        temp = validParameter.validFile(parameters, "flip", false);
-                       if (temp == "not found"){       flip = 0;       }
-                       else if(m->isTrue(temp))        {       flip = 1;       }
+                       if (temp == "not found")    {   flip = 0;       }
+                       else {  flip = m->isTrue(temp);         }
                
                        temp = validParameter.validFile(parameters, "oligos", true);
                        if (temp == "not found"){       oligoFile = "";         }
                        else if(temp == "not open"){    abort = true;   } 
-                       else                                    {       oligoFile = temp;               }
+                       else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
                        
                        
                        temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
-                       convert(temp, maxAmbig);  
+                       m->mothurConvert(temp, maxAmbig);  
 
                        temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
-                       convert(temp, maxHomoP);  
+                       m->mothurConvert(temp, maxHomoP);  
 
                        temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
-                       convert(temp, minLength); 
+                       m->mothurConvert(temp, minLength); 
                        
                        temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
-                       convert(temp, maxLength);
+                       m->mothurConvert(temp, maxLength);
                        
                        temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
-                       convert(temp, bdiffs);
+                       m->mothurConvert(temp, bdiffs);
                        
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
-                       convert(temp, pdiffs);
+                       m->mothurConvert(temp, pdiffs);
+            
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
                        
-                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
-                       convert(temp, tdiffs);
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
+                       m->mothurConvert(temp, tdiffs);
                        
-                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
                        
                        temp = validParameter.validFile(parameters, "qfile", true);     
                        if (temp == "not found")        {       qFileName = "";         }
                        else if(temp == "not open")     {       abort = true;           }
-                       else                                            {       qFileName = temp;       }
+                       else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
                        
                        temp = validParameter.validFile(parameters, "name", true);      
                        if (temp == "not found")        {       nameFile = "";          }
                        else if(temp == "not open")     {       nameFile = "";  abort = true;           }
-                       else                                            {       nameFile = temp;        }
+                       else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
-                       convert(temp, qThreshold);
+                       m->mothurConvert(temp, qThreshold);
                        
                        temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
                        qtrim = m->isTrue(temp);
@@ -272,10 +286,13 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
                        allFiles = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
+                       keepforward = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors); 
+                       m->mothurConvert(temp, processors); 
                        
                        
                        if(allFiles && (oligoFile == "")){
@@ -290,6 +307,11 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
                                abort = true;
                        }
+                       
+                       if (nameFile == "") {
+                               vector<string> files; files.push_back(fastaFile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
@@ -307,6 +329,7 @@ int TrimSeqsCommand::execute(){
                
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
+               createGroup = false;
                vector<vector<string> > fastaFileNames;
                vector<vector<string> > qualFileNames;
                vector<vector<string> > nameFileNames;
@@ -342,13 +365,15 @@ int TrimSeqsCommand::execute(){
                
                string outputGroupFileName;
                if(oligoFile != ""){
-                       outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
-                       outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
-                       getOligos(fastaFileNames, qualFileNames, nameFileNames);
+                       createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
+                       if (createGroup) {
+                               outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+                               outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
+                       }
                }
                
-               vector<unsigned long int> fastaFilePos;
-               vector<unsigned long int> qFilePos;
+               vector<unsigned long long> fastaFilePos;
+               vector<unsigned long long> qFilePos;
                
                setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
                
@@ -379,16 +404,16 @@ int TrimSeqsCommand::execute(){
                                        if (fastaFileNames[i][j] != "") {
                                                if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
                                                        if(m->isBlank(fastaFileNames[i][j])){
-                                                               remove(fastaFileNames[i][j].c_str());
+                                                               m->mothurRemove(fastaFileNames[i][j]);
                                                                namesToRemove.insert(fastaFileNames[i][j]);
                                                        
                                                                if(qFileName != ""){
-                                                                       remove(qualFileNames[i][j].c_str());
+                                                                       m->mothurRemove(qualFileNames[i][j]);
                                                                        namesToRemove.insert(qualFileNames[i][j]);
                                                                }
                                                                
                                                                if(nameFile != ""){
-                                                                       remove(nameFileNames[i][j].c_str());
+                                                                       m->mothurRemove(nameFileNames[i][j]);
                                                                        namesToRemove.insert(nameFileNames[i][j]);
                                                                }
                                                        }else{  
@@ -427,7 +452,7 @@ int TrimSeqsCommand::execute(){
                        }
                }
                
-               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
+               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
 
                //output group counts
                m->mothurOutEndLine();
@@ -438,7 +463,7 @@ int TrimSeqsCommand::execute(){
                }
                if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
                
-               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
+               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
 
                //set fasta file as new current fastafile
                string current = "";
@@ -504,7 +529,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                
                ofstream outGroupsFile;
-               if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
+               if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
                if(allFiles){
                        for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
@@ -535,17 +560,18 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                int count = 0;
                bool moreSeqs = 1;
+               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
        
                while (moreSeqs) {
                                
                        if (m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (oligoFile != "") {   outGroupsFile.close();   }
+                               if (createGroup) {       outGroupsFile.close();   }
 
                                if(qFileName != ""){
                                        qFile.close();
                                }
-                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
+                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
 
                                return 0;
                        }
@@ -555,25 +581,36 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        int currentSeqsDiffs = 0;
 
                        Sequence currSeq(inFASTA); m->gobble(inFASTA);
+                       //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
                        QualityScores currQual;
                        if(qFileName != ""){
                                currQual = QualityScores(qFile);  m->gobble(qFile);
                        }
-
+                       
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
                                
                                int barcodeIndex = 0;
                                int primerIndex = 0;
                                
+                if(numLinkers != 0){
+                                       success = trimOligos.stripLinker(currSeq, currQual);
+                                       if(!success)                            {       trashCode += 'k';       }
+                               }
+                
                                if(barcodes.size() != 0){
-                                       success = stripBarcode(currSeq, currQual, barcodeIndex);
+                                       success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
                                        if(success > bdiffs)            {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
                                
+                if(numSpacers != 0){
+                                       success = trimOligos.stripSpacer(currSeq, currQual);
+                                       if(!success)                            {       trashCode += 's';       }
+                               }
+                
                                if(numFPrimers != 0){
-                                       success = stripForward(currSeq, currQual, primerIndex);
+                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
                                        if(success > pdiffs)            {       trashCode += 'f';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
@@ -581,7 +618,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
                                
                                if(numRPrimers != 0){
-                                       success = stripReverse(currSeq, currQual);
+                                       success = trimOligos.stripReverse(currSeq, currQual);
                                        if(!success)                            {       trashCode += 'r';       }
                                }
 
@@ -603,7 +640,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
                                        else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
                                        else                                            {       success = 1;                            }
-                               
+                                       
                                        //you don't want to trim, if it fails above then scrap it
                                        if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
                                        
@@ -644,30 +681,39 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
                                        
-                                       if(barcodes.size() != 0){
-                                               string thisGroup = barcodeNameVector[barcodeIndex];
-                                               if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
-                                               
-                                               outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
-                                               
-                                               if (nameFile != "") {
-                                                       map<string, string>::iterator itName = nameMap.find(currSeq.getName());
-                                                       if (itName != nameMap.end()) { 
-                                                               vector<string> thisSeqsNames; 
-                                                               m->splitAtChar(itName->second, thisSeqsNames, ',');
-                                                               for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
-                                                                       outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
-                                                               }
-                                                       }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
-                                               }
-                                               
-                                               map<string, int>::iterator it = groupCounts.find(thisGroup);
-                                               if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
-                                               else { groupCounts[it->first]++; }
+                                       if (createGroup) {
+                                               if(barcodes.size() != 0){
+                                                       string thisGroup = barcodeNameVector[barcodeIndex];
+                                                       if (primers.size() != 0) { 
+                                                               if (primerNameVector[primerIndex] != "") { 
+                                                                       if(thisGroup != "") {
+                                                                               thisGroup += "." + primerNameVector[primerIndex]; 
+                                                                       }else {
+                                                                               thisGroup = primerNameVector[primerIndex]; 
+                                                                       }
+                                                               } 
+                                                       }
+                                                       
+                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
                                                        
+                                                       if (nameFile != "") {
+                                                               map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                                                               if (itName != nameMap.end()) { 
+                                                                       vector<string> thisSeqsNames; 
+                                                                       m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                                                       for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                                                               outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
+                                                                       }
+                                                               }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
+                                                       }
+                                                       
+                                                       map<string, int>::iterator it = groupCounts.find(thisGroup);
+                                                       if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
+                                                       else { groupCounts[it->first]++; }
+                                                               
+                                               }
                                        }
                                        
-                                       
                                        if(allFiles){
                                                ofstream output;
                                                m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
@@ -708,7 +754,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        }
                        
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               unsigned long int pos = inFASTA.tellg();
+                               unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= line->end)) { break; }
                        
                        #else
@@ -726,7 +772,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                inFASTA.close();
                trimFASTAFile.close();
                scrapFASTAFile.close();
-               if (oligoFile != "") {   outGroupsFile.close();   }
+               if (createGroup) {       outGroupsFile.close();   }
                if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
                if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
                
@@ -798,7 +844,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                                 qLines[process]);
                                
                                //pass groupCounts to parent
-                               if(oligoFile != ""){
+                               if(createGroup){
                                        ofstream out;
                                        string tempFile = filename + toString(getpid()) + ".num.temp";
                                        m->openOutputFile(tempFile, out);
@@ -845,27 +891,27 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
                        
                        m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
-                       remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
                        m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
-                       remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
                        
                        if(qFileName != ""){
                                m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
-                               remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
                                m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
-                               remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
                        }
                        
                        if(nameFile != ""){
                                m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
-                               remove((trimNameFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
                                m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
-                               remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
                        }
                        
-                       if(oligoFile != ""){
+                       if(createGroup){
                                m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
-                               remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
                        }
                        
                        
@@ -874,23 +920,23 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                        for(int k=0;k<fastaFileNames[j].size();k++){
                                                if (fastaFileNames[j][k] != "") {
                                                        m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
-                                                       remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                                       m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
                                                        
                                                        if(qFileName != ""){
                                                                m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
-                                                               remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                                               m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
                                                        }
                                                        
                                                        if(nameFile != ""){
                                                                m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
-                                                               remove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                                               m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
                                                        }
                                                }
                                        }
                                }
                        }
                        
-                       if(oligoFile != ""){
+                       if(createGroup){
                                ifstream in;
                                string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
                                m->openInputFile(tempFile, in);
@@ -908,7 +954,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                else { groupCounts[it->first] += tempNum; }
                                        }
                                }
-                               in.close(); remove(tempFile.c_str());
+                               in.close(); m->mothurRemove(tempFile);
                        }
                        
                }
@@ -924,7 +970,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
 
 /**************************************************************************************************/
 
-int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
+int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
        try {
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                //set file positions for fasta file
@@ -963,7 +1009,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
                                        map<string, int>::iterator it = firstSeqNames.find(sname);
                                        
                                        if(it != firstSeqNames.end()) { //this is the start of a new chunk
-                                               unsigned long int pos = inQual.tellg(); 
+                                               unsigned long long pos = inQual.tellg(); 
                                                qfileFilePos.push_back(pos - input.length() - 1);       
                                                firstSeqNames.erase(it);
                                        }
@@ -985,7 +1031,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
 
                //get last file position of qfile
                FILE * pFile;
-               unsigned long int size;
+               unsigned long long size;
                
                //get num bytes in file
                pFile = fopen (qfilename.c_str(),"rb");
@@ -1003,33 +1049,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
                #else
                
                        fastaFilePos.push_back(0); qfileFilePos.push_back(0);
-                       //get last file position of fastafile
-                       FILE * pFile;
-                       unsigned long int size;
-                       
-                       //get num bytes in file
-                       pFile = fopen (filename.c_str(),"rb");
-                       if (pFile==NULL) perror ("Error opening file");
-                       else{
-                               fseek (pFile, 0, SEEK_END);
-                               size=ftell (pFile);
-                               fclose (pFile);
-                       }
-                       fastaFilePos.push_back(size);
-                       
-                       //get last file position of fastafile
-                       FILE * qFile;
-                       
-                       //get num bytes in file
-                       qFile = fopen (qfilename.c_str(),"rb");
-                       if (qFile==NULL) perror ("Error opening file");
-                       else{
-                               fseek (qFile, 0, SEEK_END);
-                               size=ftell (qFile);
-                               fclose (qFile);
-                       }
-                       qfileFilePos.push_back(size);
-               
+                       fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
                        return 1;
                
                #endif
@@ -1042,7 +1062,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
 
 //***************************************************************************************************************
 
-void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
+bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
        try {
                ifstream inOligos;
                m->openInputFile(oligoFile, inOligos);
@@ -1106,6 +1126,10 @@ void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                                
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
                                }
                                else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
                        }
@@ -1197,286 +1221,37 @@ void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                }
                numFPrimers = primers.size();
                numRPrimers = revPrimer.size();
-
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "getOligos");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-
-int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
-       try {
+        numLinkers = linker.size();
+        numSpacers = spacer.size();
                
-               string rawSequence = seq.getUnaligned();
-               int success = bdiffs + 1;       //guilty until proven innocent
-               
-               //can you find the barcode
-               for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
-                       string oligo = it->first;
-                       if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
-                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
-                               break;  
-                       }
-                       
-                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
-                               group = it->second;
-                               seq.setUnaligned(rawSequence.substr(oligo.length()));
-                               
-                               if(qual.getName() != ""){
-                                       qual.trimQScores(oligo.length(), -1);
-                               }
-                               
-                               success = 0;
+               bool allBlank = true;
+               for (int i = 0; i < barcodeNameVector.size(); i++) {
+                       if (barcodeNameVector[i] != "") {
+                               allBlank = false;
                                break;
                        }
                }
-               
-               //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
-               
-               else { //try aligning and see if you can find it
-
-                       int maxLength = 0;
-
-                       Alignment* alignment;
-                       if (barcodes.size() > 0) {
-                               map<string,int>::iterator it=barcodes.begin();
-
-                               for(it;it!=barcodes.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-
-                       }else{ alignment = NULL; } 
-                       
-                       //can you find the barcode
-                       int minDiff = 1e6;
-                       int minCount = 1;
-                       int minGroup = -1;
-                       int minPos = 0;
-                       
-                       for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
-                               string oligo = it->first;
-//                             int length = oligo.length();
-                               
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
-                                       success = bdiffs + 10;
-                                       break;
-                               }
-                               
-                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
-                               oligo = alignment->getSeqAAln();
-                               string temp = alignment->getSeqBAln();
-               
-                               int alnLength = oligo.length();
-                               
-                               for(int i=oligo.length()-1;i>=0;i--){
-                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
-                               }
-                               oligo = oligo.substr(0,alnLength);
-                               temp = temp.substr(0,alnLength);
-                               
-                               int numDiff = countDiffs(oligo, temp);
-                               
-                               if(numDiff < minDiff){
-                                       minDiff = numDiff;
-                                       minCount = 1;
-                                       minGroup = it->second;
-                                       minPos = 0;
-                                       for(int i=0;i<alnLength;i++){
-                                               if(temp[i] != '-'){
-                                                       minPos++;
-                                               }
-                                       }
-                               }
-                               else if(numDiff == minDiff){
-                                       minCount++;
-                               }
-
-                       }
-
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
-                       else{                                                                                                   //use the best match
-                               group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(minPos));
-                               
-                               if(qual.getName() != ""){
-                                       qual.trimQScores(minPos, -1);
-                               }
-                               success = minDiff;
-                       }
-                       
-                       if (alignment != NULL) {  delete alignment;  }
-                       
-               }
-               
-               return success;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
-               exit(1);
-       }
-
-}
-
-//***************************************************************************************************************
-
-int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
-       try {
-               string rawSequence = seq.getUnaligned();
-               int success = pdiffs + 1;       //guilty until proven innocent
-               
-               //can you find the primer
-               for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
-                       string oligo = it->first;
-                       if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
-                               success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
-                               break;  
-                       }
-                       
-                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
-                               group = it->second;
-                               seq.setUnaligned(rawSequence.substr(oligo.length()));
-                               if(qual.getName() != ""){
-                                       qual.trimQScores(oligo.length(), -1);
-                               }
-                               success = 0;
+               for (int i = 0; i < primerNameVector.size(); i++) {
+                       if (primerNameVector[i] != "") {
+                               allBlank = false;
                                break;
                        }
                }
-
-               //if you found the barcode or if you don't want to allow for diffs
-               if ((pdiffs == 0) || (success == 0)) { return success;  }
                
-               else { //try aligning and see if you can find it
-
-                       int maxLength = 0;
-
-                       Alignment* alignment;
-                       if (primers.size() > 0) {
-                               map<string,int>::iterator it=primers.begin();
-
-                               for(it;it!=primers.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
-
-                       }else{ alignment = NULL; } 
-                       
-                       //can you find the barcode
-                       int minDiff = 1e6;
-                       int minCount = 1;
-                       int minGroup = -1;
-                       int minPos = 0;
-                       
-                       for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
-                               string oligo = it->first;
-//                             int length = oligo.length();
-                               
-                               if(rawSequence.length() < maxLength){   
-                                       success = pdiffs + 100;
-                                       break;
-                               }
-                               
-                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
-                               oligo = alignment->getSeqAAln();
-                               string temp = alignment->getSeqBAln();
-               
-                               int alnLength = oligo.length();
-                               
-                               for(int i=oligo.length()-1;i>=0;i--){
-                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
-                               }
-                               oligo = oligo.substr(0,alnLength);
-                               temp = temp.substr(0,alnLength);
-                               
-                               int numDiff = countDiffs(oligo, temp);
-                               
-                               if(numDiff < minDiff){
-                                       minDiff = numDiff;
-                                       minCount = 1;
-                                       minGroup = it->second;
-                                       minPos = 0;
-                                       for(int i=0;i<alnLength;i++){
-                                               if(temp[i] != '-'){
-                                                       minPos++;
-                                               }
-                                       }
-                               }
-                               else if(numDiff == minDiff){
-                                       minCount++;
-                               }
-
-                       }
-
-                       if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
-                       else{                                                                                                   //use the best match
-                               group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(minPos));
-                               if(qual.getName() != ""){
-                                       qual.trimQScores(minPos, -1);
-                               }
-                               success = minDiff;
-                       }
-                       
-                       if (alignment != NULL) {  delete alignment;  }
-                       
+               if (allBlank) {
+                       m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
+                       allFiles = false;
+                       return false;
                }
                
-               return success;
-
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "stripForward");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-
-bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
-       try {
-               string rawSequence = seq.getUnaligned();
-               bool success = 0;       //guilty until proven innocent
-               
-               for(int i=0;i<numRPrimers;i++){
-                       string oligo = revPrimer[i];
-                       
-                       if(rawSequence.length() < oligo.length()){
-                               success = 0;
-                               break;
-                       }
-                       
-                       if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
-                               seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
-                               if(qual.getName() != ""){
-                                       qual.trimQScores(-1, rawSequence.length()-oligo.length());
-                               }
-                               success = 1;
-                               break;
-                       }
-               }       
-               return success;
+               return true;
                
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "stripReverse");
+               m->errorOut(e, "TrimSeqsCommand", "getOligos");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
 
 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
@@ -1582,80 +1357,4 @@ bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
        }
        
 }
-
-//***************************************************************************************************************
-
-bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
-       try {
-               bool success = 1;
-               int length = oligo.length();
-               
-               for(int i=0;i<length;i++){
-                       
-                       if(oligo[i] != seq[i]){
-                               if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
-                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
-                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
-                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
-                               
-                               if(success == 0)        {       break;   }
-                       }
-                       else{
-                               success = 1;
-                       }
-               }
-               
-               return success;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
-               exit(1);
-       }
-
-}
-
-//***************************************************************************************************************
-
-int TrimSeqsCommand::countDiffs(string oligo, string seq){
-       try {
-
-               int length = oligo.length();
-               int countDiffs = 0;
-               
-               for(int i=0;i<length;i++){
-                                                               
-                       if(oligo[i] != seq[i]){
-                               if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
-                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
-                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
-                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
-                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
-                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
-                       }
-                       
-               }
-               
-               return countDiffs;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "countDiffs");
-               exit(1);
-       }
-
-}
-
 //***************************************************************************************************************