]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.h
added count file to trim.seqs, get.groups, get.lineage, get.seqs, heatmap.sim, list...
[mothur.git] / trimseqscommand.h
index 957f37a65bfb1c177f000c9c6d031ddfc3784c81..8d9a57a54ffd0db053956200602f3aa06fb4de5b 100644 (file)
@@ -14,8 +14,8 @@
 #include "command.hpp"
 #include "sequence.hpp"
 #include "qualityscores.h"
-#include "groupmap.h"
 #include "trimoligos.h"
+#include "counttable.h"
 
 
 class TrimSeqsCommand : public Command {
@@ -36,16 +36,13 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
-       
-       GroupMap* groupMap;
-    
     struct linePair {
         unsigned long long start;
         unsigned long long end;
         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
         linePair() {}
     };
-
+    
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
        bool keepFirstTrim(Sequence&, QualityScores&);
        bool removeLastTrim(Sequence&, QualityScores&);
@@ -55,7 +52,7 @@ private:
     string reverseOligo(string);
 
        bool abort, createGroup;
-       string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir;
+       string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
        bool flip, allFiles, qtrim, keepforward;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
@@ -75,13 +72,15 @@ private:
        vector<string> barcodeNameVector;       //needed here?
        map<string, int> groupCounts;  
        map<string, string> nameMap;
+    map<string, int> nameCount; //for countfile name -> repCount
+    map<string, string> groupMap; //for countfile name -> group
 
        vector<int> processIDS;   //processid
        vector<linePair> lines;
        vector<linePair> qLines;
        
-       int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
-       int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
+       int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
+       int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
        int setLines(string, string);
 };
 
@@ -92,7 +91,7 @@ private:
 struct trimData {
     unsigned long long start, end;
     MothurOut* m;
-    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, groupFileName, nameFile;
+    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
        vector<vector<string> > fastaFileNames;
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
@@ -105,6 +104,7 @@ struct trimData {
        map<string, int> barcodes;
     map<string, int> rbarcodes;
        map<string, int> primers;
+    map<string, int> nameCount;
     vector<string>  linker;
     vector<string>  spacer;
        map<string, int> combos;
@@ -112,22 +112,26 @@ struct trimData {
        vector<string> barcodeNameVector;       
        map<string, int> groupCounts;  
        map<string, string> nameMap;
+    map<string, string> groupMap;
     
        trimData(){}
-       trimData(string fn, string qn, string nf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
+       trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, map<string, int> rbar, vector<string> revP, vector<string> li, vector<string> spa, 
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
-                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm) {
+                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
         qFileName = qn;
         nameFile = nf;
+        countfile = cf;
         trimFileName = tn;
         scrapFileName = sn;
         trimQFileName = tqn;
         scrapQFileName = sqn;
         trimNFileName = tnn;
         scrapNFileName = snn;
+        trimCFileName = tcn;
+        scrapCFileName = scn;
         groupFileName = gn;
         fastaFileNames = ffn;
         qualFileNames = qfn;
@@ -137,6 +141,7 @@ struct trimData {
         qlineStart = qstart;
         qlineEnd = qend;
                m = mout;
+        nameCount = ncount;
         
         pdiffs = pd;
         bdiffs = bd;
@@ -203,7 +208,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                
                
                ofstream outGroupsFile;
-               if (pDataArray->createGroup){   pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
+               if ((pDataArray->createGroup) && (pDataArray->countfile == "")){        pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
                if(pDataArray->allFiles){
                        for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
@@ -222,6 +227,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        }
                }
                
+        ofstream trimCountFile;
+               ofstream scrapCountFile;
+               if(pDataArray->countfile != ""){
+                       pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
+                       pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
+            if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
+               }
+        
                ifstream inFASTA;
                pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
                if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
@@ -248,7 +261,11 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                   
                        if (pDataArray->m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (pDataArray->createGroup) {   outGroupsFile.close();   }
+                               if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
+                if(pDataArray->qFileName != "")        {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
+                if(pDataArray->nameFile != "") {       scrapNameFile.close(); trimNameFile.close();    }
+                if(pDataArray->countfile != "")        {       scrapCountFile.close(); trimCountFile.close();  }
+
                                if(pDataArray->qFileName != ""){ qFile.close(); }
                                return 0;
                        }
@@ -399,6 +416,15 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
                                        
+                    int numRedundants = 0;
+                    if (pDataArray->countfile != "") {
+                        map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+                        if (itCount != pDataArray->nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                            numRedundants = itCount->second-1;
+                        }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+                    }
+                                       
                                        if (pDataArray->createGroup) {
                                                if(pDataArray->barcodes.size() != 0){
                                                        string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
@@ -412,9 +438,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                                } 
                                                        }
                                                        
-                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+                            else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
                                                        
-                            int numRedundants = 0;
                                                        if (pDataArray->nameFile != "") {
                                                                map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
                                                                if (itName != pDataArray->nameMap.end()) { 
@@ -462,6 +488,12 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
+                    if (pDataArray->countfile != "") {
+                        map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+                        if (itCount != pDataArray->nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                        }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+                    }
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
                                        currSeq.setUnaligned(origSeq);
                                        currSeq.setAligned(origSeq);