]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.h
moved mothur's source into a folder to make grabbing just the source easier on github
[mothur.git] / trimseqscommand.h
diff --git a/trimseqscommand.h b/trimseqscommand.h
deleted file mode 100644 (file)
index 9ba64c4..0000000
+++ /dev/null
@@ -1,492 +0,0 @@
-#ifndef TRIMSEQSCOMMAND_H
-#define TRIMSEQSCOMMAND_H
-
-/*
- *  trimseqscommand.h
- *  Mothur
- *
- *  Created by Pat Schloss on 6/6/09.
- *  Copyright 2009 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "mothur.h"
-#include "command.hpp"
-#include "sequence.hpp"
-#include "qualityscores.h"
-#include "groupmap.h"
-#include "trimoligos.h"
-
-
-class TrimSeqsCommand : public Command {
-public:
-       TrimSeqsCommand(string);
-       TrimSeqsCommand();
-       ~TrimSeqsCommand(){}
-       
-       vector<string> setParameters();
-       string getCommandName()                 { return "trim.seqs";   }
-       string getCommandCategory()             { return "Sequence Processing";         }
-       string getHelpString(); 
-       string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; }
-       string getDescription()         { return "provides the preprocessing features needed to screen and sort pyrosequences"; }
-
-       int execute(); 
-       void help() { m->mothurOut(getHelpString()); }  
-       
-private:
-       
-       GroupMap* groupMap;
-    
-    struct linePair {
-        unsigned long long start;
-        unsigned long long end;
-        linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
-        linePair() {}
-    };
-
-       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
-       bool keepFirstTrim(Sequence&, QualityScores&);
-       bool removeLastTrim(Sequence&, QualityScores&);
-       bool cullLength(Sequence&);
-       bool cullHomoP(Sequence&);
-       bool cullAmbigs(Sequence&);
-    string reverseOligo(string);
-
-       bool abort, createGroup;
-       string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir;
-       
-       bool flip, allFiles, qtrim, keepforward;
-       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
-       int qWindowSize, qWindowStep, keepFirst, removeLast;
-       double qRollAverage, qThreshold, qWindowAverage, qAverage;
-       vector<string> revPrimer, outputNames;
-       set<string> filesToRemove;
-       map<string, int> barcodes;
-       vector<string> groupVector;
-       map<string, int> primers;
-    vector<string>  linker;
-    vector<string>  spacer;
-       map<string, int> combos;
-       map<string, int> groupToIndex;
-       vector<string> primerNameVector;        //needed here?
-       vector<string> barcodeNameVector;       //needed here?
-       map<string, int> groupCounts;  
-       map<string, string> nameMap;
-
-       vector<int> processIDS;   //processid
-       vector<linePair> lines;
-       vector<linePair> qLines;
-       
-       int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
-       int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
-       int setLines(string, string);
-};
-
-/**************************************************************************************************/
-//custom data structure for threads to use.
-// This is passed by void pointer so it can be any data type
-// that can be passed using a single void pointer (LPVOID).
-struct trimData {
-    unsigned long long start, end;
-    MothurOut* m;
-    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, groupFileName, nameFile;
-       vector<vector<string> > fastaFileNames;
-    vector<vector<string> > qualFileNames;
-    vector<vector<string> > nameFileNames;
-    unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
-    bool flip, allFiles, qtrim, keepforward, createGroup;
-       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
-       int qWindowSize, qWindowStep, keepFirst, removeLast, count;
-       double qRollAverage, qThreshold, qWindowAverage, qAverage;
-    vector<string> revPrimer;
-       map<string, int> barcodes;
-       map<string, int> primers;
-    vector<string>  linker;
-    vector<string>  spacer;
-       map<string, int> combos;
-       vector<string> primerNameVector;        
-       vector<string> barcodeNameVector;       
-       map<string, int> groupCounts;  
-       map<string, string> nameMap;
-    
-       trimData(){}
-       trimData(string fn, string qn, string nf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
-                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, 
-                      vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
-                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
-                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm) {
-        filename = fn;
-        qFileName = qn;
-        nameFile = nf;
-        trimFileName = tn;
-        scrapFileName = sn;
-        trimQFileName = tqn;
-        scrapQFileName = sqn;
-        trimNFileName = tnn;
-        scrapNFileName = snn;
-        groupFileName = gn;
-        fastaFileNames = ffn;
-        qualFileNames = qfn;
-        nameFileNames = nfn;
-        lineStart = lstart;
-        lineEnd = lend;
-        qlineStart = qstart;
-        qlineEnd = qend;
-               m = mout;
-        
-        pdiffs = pd;
-        bdiffs = bd;
-        ldiffs = ld;
-        sdiffs = sd;
-        tdiffs = td;
-        barcodes = bar;
-        primers = pri;      numFPrimers = primers.size();
-        revPrimer = revP;   numRPrimers = revPrimer.size();
-        linker = li;        numLinkers = linker.size();
-        spacer = spa;       numSpacers = spacer.size();
-        primerNameVector = priNameVector;
-        barcodeNameVector = barNameVector;
-        
-        createGroup = cGroup;
-        allFiles = aFiles;
-        keepforward = keepF;
-        keepFirst = keepfi;
-        removeLast = removeL;
-        qWindowStep = WindowStep;
-        qWindowSize = WindowSize;
-        qWindowAverage = WindowAverage;
-        qtrim = trim;
-        qThreshold = Threshold;
-        qAverage = Average;
-        qRollAverage = RollAverage;
-        minLength = minL;
-        maxAmbig = maxA;
-        maxHomoP = maxH;
-        maxLength = maxL;
-        flip = fli;
-        nameMap = nm;
-        count = 0;
-       }
-};
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ 
-       trimData* pDataArray;
-       pDataArray = (trimData*)lpParam;
-       
-       try {
-        ofstream trimFASTAFile;
-               pDataArray->m->openOutputFile(pDataArray->trimFileName, trimFASTAFile);
-               
-               ofstream scrapFASTAFile;
-               pDataArray->m->openOutputFile(pDataArray->scrapFileName, scrapFASTAFile);
-               
-               ofstream trimQualFile;
-               ofstream scrapQualFile;
-               if(pDataArray->qFileName != ""){
-                       pDataArray->m->openOutputFile(pDataArray->trimQFileName, trimQualFile);
-                       pDataArray->m->openOutputFile(pDataArray->scrapQFileName, scrapQualFile);
-               }
-               
-               ofstream trimNameFile;
-               ofstream scrapNameFile;
-               if(pDataArray->nameFile != ""){
-                       pDataArray->m->openOutputFile(pDataArray->trimNFileName, trimNameFile);
-                       pDataArray->m->openOutputFile(pDataArray->scrapNFileName, scrapNameFile);
-               }
-               
-               
-               ofstream outGroupsFile;
-               if (pDataArray->createGroup){   pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
-               if(pDataArray->allFiles){
-                       for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
-                               for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
-                                       if (pDataArray->fastaFileNames[i][j] != "") {
-                                               ofstream temp;
-                                               pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
-                                               if(pDataArray->qFileName != ""){
-                                                       pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                   temp.close();
-                                               }
-                                               
-                                               if(pDataArray->nameFile != ""){
-                                                       pDataArray->m->openOutputFile(pDataArray->nameFileNames[i][j], temp);                   temp.close();
-                                               }
-                                       }
-                               }
-                       }
-               }
-               
-               ifstream inFASTA;
-               pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
-               if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
-                       inFASTA.seekg(0);
-               }else { //this accounts for the difference in line endings. 
-                       inFASTA.seekg(pDataArray->lineStart-1); pDataArray->m->gobble(inFASTA); 
-               }
-               
-               ifstream qFile;
-               if(pDataArray->qFileName != "") {
-                       pDataArray->m->openInputFile(pDataArray->qFileName, qFile);
-                       if ((pDataArray->qlineStart == 0) || (pDataArray->qlineStart == 1)) {
-                qFile.seekg(0);
-            }else { //this accounts for the difference in line endings. 
-                qFile.seekg(pDataArray->qlineStart-1); pDataArray->m->gobble(qFile); 
-            } 
-               }
-               
-               
-               TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
-        
-               pDataArray->count = pDataArray->lineEnd;
-               for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
-                                  
-                       if (pDataArray->m->control_pressed) { 
-                               inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (pDataArray->createGroup) {   outGroupsFile.close();   }
-                               if(pDataArray->qFileName != ""){ qFile.close(); }
-                               return 0;
-                       }
-                       
-                       int success = 1;
-                       string trashCode = "";
-                       int currentSeqsDiffs = 0;
-            
-                       Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
-                       
-                       QualityScores currQual;
-                       if(pDataArray->qFileName != ""){
-                               currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
-                       }
-                       
-                       string origSeq = currSeq.getUnaligned();
-                       if (origSeq != "") {
-                               
-                               int barcodeIndex = 0;
-                               int primerIndex = 0;
-                               
-                if(pDataArray->numLinkers != 0){
-                                       success = trimOligos.stripLinker(currSeq, currQual);
-                                       if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
-                                       else{ currentSeqsDiffs += success;  }
-                               }
-                
-                               if(pDataArray->barcodes.size() != 0){
-                                       success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
-                                       if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
-                                       else{ currentSeqsDiffs += success;  }
-                               }
-                               
-                if(pDataArray->numSpacers != 0){
-                                       success = trimOligos.stripSpacer(currSeq, currQual);
-                                       if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
-                                       else{ currentSeqsDiffs += success;  }
-
-                               }
-                
-                               if(pDataArray->numFPrimers != 0){
-                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
-                                       if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
-                                       else{ currentSeqsDiffs += success;  }
-                               }
-                               
-                               if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
-                               
-                               if(pDataArray->numRPrimers != 0){
-                                       success = trimOligos.stripReverse(currSeq, currQual);
-                                       if(!success)                            {       trashCode += 'r';       }
-                               }
-                
-                               if(pDataArray->keepFirst != 0){
-                                       //success = keepFirstTrim(currSeq, currQual);
-                    success = 1;
-                    if(currQual.getName() != ""){
-                        currQual.trimQScores(-1, pDataArray->keepFirst);
-                    }
-                    currSeq.trim(pDataArray->keepFirst);
-                               }
-                               
-                               if(pDataArray->removeLast != 0){
-                                       //success = removeLastTrim(currSeq, currQual);
-                    success = 0;
-                    int length = currSeq.getNumBases() - pDataArray->removeLast;
-                    
-                    if(length > 0){
-                        if(currQual.getName() != ""){
-                            currQual.trimQScores(-1, length);
-                        }
-                        currSeq.trim(length);
-                        success = 1;
-                    }
-                    else{ success = 0; }
-                    
-                                       if(!success)                            {       trashCode += 'l';       }
-                               }
-                
-                               
-                               if(pDataArray->qFileName != ""){
-                                       int origLength = currSeq.getNumBases();
-                                       
-                                       if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
-                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
-                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
-                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
-                                       else                                            {       success = 1;                            }
-                                       
-                                       //you don't want to trim, if it fails above then scrap it
-                                       if ((!pDataArray->qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
-                                       
-                                       if(!success)                            {       trashCode += 'q';       }
-                               }                               
-                
-                               if(pDataArray->minLength > 0 || pDataArray->maxLength > 0){
-                                       //success = cullLength(currSeq);
-                    int length = currSeq.getNumBases();
-                    success = 0;       //guilty until proven innocent
-                    if(length >= pDataArray->minLength && pDataArray->maxLength == 0)                  {       success = 1;    }
-                    else if(length >= pDataArray->minLength && length <= pDataArray->maxLength)        {       success = 1;    }
-                    else                                                                                               {       success = 0;    }
-                    
-                                       if(!success)                            {       trashCode += 'l';       }
-                               }
-                               if(pDataArray->maxHomoP > 0){
-                                       //success = cullHomoP(currSeq);
-                    int longHomoP = currSeq.getLongHomoPolymer();
-                    success = 0;       //guilty until proven innocent
-                    if(longHomoP <= pDataArray->maxHomoP){     success = 1;    }
-                    else                                       {       success = 0;    }
-                    
-                                       if(!success)                            {       trashCode += 'h';       }
-                               }
-                               if(pDataArray->maxAmbig != -1){
-                                       //success = cullAmbigs(currSeq);
-                    int numNs = currSeq.getAmbigBases();
-                    success = 0;       //guilty until proven innocent
-                    if(numNs <= pDataArray->maxAmbig)  {       success = 1;    }
-                    else                                       {       success = 0;    }
-                                       if(!success)                            {       trashCode += 'n';       }
-                               }
-                               
-                               if(pDataArray->flip){           // should go last                       
-                                       currSeq.reverseComplement();
-                                       if(pDataArray->qFileName != ""){
-                                               currQual.flipQScores(); 
-                                       }
-                               }
-                               
-                               if(trashCode.length() == 0){
-                                       currSeq.setAligned(currSeq.getUnaligned());
-                                       currSeq.printSequence(trimFASTAFile);
-                                       
-                                       if(pDataArray->qFileName != ""){
-                                               currQual.printQScores(trimQualFile);
-                                       }
-                                       
-                                       if(pDataArray->nameFile != ""){
-                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
-                                               if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
-                                               else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
-                                       }
-                                       
-                                       if (pDataArray->createGroup) {
-                                               if(pDataArray->barcodes.size() != 0){
-                                                       string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
-                                                       if (pDataArray->primers.size() != 0) { 
-                                                               if (pDataArray->primerNameVector[primerIndex] != "") { 
-                                                                       if(thisGroup != "") {
-                                                                               thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
-                                                                       }else {
-                                                                               thisGroup = pDataArray->primerNameVector[primerIndex]; 
-                                                                       }
-                                                               } 
-                                                       }
-                                                       
-                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
-                                                       
-                                                       if (pDataArray->nameFile != "") {
-                                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
-                                                               if (itName != pDataArray->nameMap.end()) { 
-                                                                       vector<string> thisSeqsNames; 
-                                                                       pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
-                                                                       for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
-                                                                               outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
-                                                                       }
-                                                               }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                   
-                                                       }
-                                                       
-                                                       map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
-                                                       if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1; }
-                                                       else { pDataArray->groupCounts[it->first]++; }
-                            
-                                               }
-                                       }
-                                       
-                                       if(pDataArray->allFiles){
-                                               ofstream output;
-                                               pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
-                                               currSeq.printSequence(output);
-                                               output.close();
-                                               
-                                               if(pDataArray->qFileName != ""){
-                                                       pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
-                                                       currQual.printQScores(output);
-                                                       output.close();                                                 
-                                               }
-                                               
-                                               if(pDataArray->nameFile != ""){
-                                                       map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
-                                                       if (itName != pDataArray->nameMap.end()) { 
-                                                               pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
-                                                               output << itName->first << '\t' << itName->second << endl; 
-                                                               output.close();
-                                                       }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
-                                               }
-                                       }
-                               }
-                               else{
-                                       if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed
-                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
-                                               if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
-                                               else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
-                                       }
-                                       currSeq.setName(currSeq.getName() + '|' + trashCode);
-                                       currSeq.setUnaligned(origSeq);
-                                       currSeq.setAligned(origSeq);
-                                       currSeq.printSequence(scrapFASTAFile);
-                                       if(pDataArray->qFileName != ""){
-                                               currQual.printQScores(scrapQualFile);
-                                       }
-                               }
-                               
-                       }
-                       
-                       //report progress
-                       if((i) % 1000 == 0){    pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }
-                       
-               }
-               //report progress
-               if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
-               
-               
-               inFASTA.close();
-               trimFASTAFile.close();
-               scrapFASTAFile.close();
-               if (pDataArray->createGroup) {   outGroupsFile.close();   }
-               if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
-               if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
-               
-        return 0;
-            
-        }
-        catch(exception& e) {
-            pDataArray->m->errorOut(e, "TrimSeqsCommand", "MyTrimThreadFunction");
-            exit(1);
-        }
-    } 
-#endif
-    
-
-/**************************************************************************************************/
-
-#endif