]> git.donarmstrong.com Git - mothur.git/blobdiff - makecontigscommand.h
changes while testing
[mothur.git] / makecontigscommand.h
index 2fbc09e9fa5f464bcf56e1f5f324055f76d893a3..1ea38358c7ec4c56e2d1adb926f7767301daef9d 100644 (file)
@@ -32,9 +32,12 @@ struct fastqRead {
 struct pairFastqRead {
        fastqRead forward;
     fastqRead reverse;
+    fastqRead findex;
+    fastqRead rindex;
        
        pairFastqRead() {};
        pairFastqRead(fastqRead f, fastqRead r) : forward(f), reverse(r){};
+    pairFastqRead(fastqRead f, fastqRead r, fastqRead fi, fastqRead ri) : forward(f), reverse(r), findex(fi), rindex(ri) {};
        ~pairFastqRead() {};
 };
 /**************************************************************************************************/
@@ -59,8 +62,8 @@ public:
     void help() { m->mothurOut(getHelpString()); }     
     
 private:
-    bool abort, allFiles, createGroup, trimOverlap;
-    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format;
+    bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk;
+    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format;
        float match, misMatch, gapOpen, gapExtend;
        int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq;
     vector<string> outputNames;
@@ -75,19 +78,21 @@ private:
     
        map<string, int> groupCounts; 
     map<string, string> groupMap;
+    map<int, string> file2Group;
     
     vector<int> convertQual(string);
     fastqRead readFastq(ifstream&, bool&);
     vector< vector< vector<string> > > preProcessData(unsigned long int&);
     vector< vector<string> > readFileNames(string);
-    vector< vector<string> > readFastqFiles(unsigned long int&, string, string);
+    vector< vector<string> > readFastqFiles(unsigned long int&, string, string, string, string);
     vector< vector<string> > readFastaFiles(unsigned long int&, string, string);
     //bool checkReads(fastqRead&, fastqRead&, string, string);
-    int createProcesses(vector< vector<string> >, string, string, string, vector<vector<string> >);
-    int driver(vector<string>, string, string, string, vector<vector<string> >, int);
+    int createProcesses(vector< vector<string> >, string, string, string, vector<vector<string> >, int);
+    int driver(vector<string>, string, string, string, vector<vector<string> >, int, string);
     bool getOligos(vector<vector<string> >&, string);
     string reverseOligo(string);
-    vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques);
+    vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool);
+    vector<pairFastqRead> mergeReads(vector<pairFastqRead> frReads, vector<pairFastqRead> friReads, map<string, pairFastqRead>& pairUniques);
 };
 
 /**************************************************************************************************/
@@ -100,13 +105,13 @@ struct contigsData {
        string outputFasta; 
     string outputScrapFasta; 
        string outputMisMatches;
-       string align;
+       string align, group;
     vector<string> files;
     vector<vector<string> > fastaFileNames;
        MothurOut* m;
        float match, misMatch, gapOpen, gapExtend;
        int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq;
-    bool allFiles, createGroup, done, trimOverlap;
+    bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap;
     map<string, int> groupCounts; 
     map<string, string> groupMap;
     vector<string> primerNameVector;   
@@ -115,7 +120,7 @@ struct contigsData {
        map<int, oligosPair> primers;
        
        contigsData(){}
-       contigsData(vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, bool to, int tid) {
+       contigsData(string g, vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) {
         files = f;
                outputFasta = of;
         outputMisMatches = om;
@@ -126,6 +131,7 @@ struct contigsData {
                gapExtend = gapE; 
         insert = thr;
                align = al;
+        group = g;
                count = 0;
         outputScrapFasta = osf;
         fastaFileNames = ffn;
@@ -138,7 +144,8 @@ struct contigsData {
         tdiffs = tdf;
         allFiles = all;
         trimOverlap = to;
-        createGroup = cg;
+        createOligosGroup = cg;
+        createFileGroup = cfg;
                threadID = tid;
         deltaq = delt;
         done=false;
@@ -163,8 +170,10 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
         string thisfqualfile = pDataArray->files[1];
         string thisrfastafile = pDataArray->files[2];
         string thisrqualfile = pDataArray->files[3];
+        string thisfindexfile = pDataArray->files[4];
+        string thisrindexfile = pDataArray->files[5];
         
-        if (pDataArray->m->debug) {  pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+        if (pDataArray->m->debug) {  pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
         
                if(pDataArray->allFiles){
                        for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
@@ -177,7 +186,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                        }
                }
         
-        ifstream inFFasta, inRFasta, inFQual, inRQual;
+        ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
         ofstream outFasta, outMisMatch, outScrapFasta;
         pDataArray->m->openInputFile(thisffastafile, inFFasta);
         pDataArray->m->openInputFile(thisrfastafile, inRFasta);
@@ -185,11 +194,15 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
             pDataArray->m->openInputFile(thisfqualfile, inFQual);
             pDataArray->m->openInputFile(thisrqualfile, inRQual);
         }
+        
+        if (thisfindexfile != "") { pDataArray->m->openInputFile(thisfindexfile, inFIndex);  }
+        if (thisrindexfile != "") { pDataArray->m->openInputFile(thisrindexfile, inRIndex);  }
+        
         pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
         pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
         pDataArray->m->openOutputFile(pDataArray->outputScrapFasta, outScrapFasta);
         
-        if (pDataArray->threadID == 0) {  outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  }
+        outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
         
         TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes);
         
@@ -210,12 +223,27 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                 rQual = new QualityScores(inRQual); pDataArray->m->gobble(inRQual);
             }
             
+            Sequence findexBarcode("findex", "NONE");  Sequence rindexBarcode("rindex", "NONE");
+            if (thisfindexfile != "") {
+                Sequence temp(inFIndex); pDataArray->m->gobble(inFIndex);
+                findexBarcode.setAligned(temp.getAligned());
+            }
+            
+            if (thisrindexfile != "") {
+                Sequence temp(inRIndex); pDataArray->m->gobble(inRIndex);
+                rindexBarcode.setAligned(temp.getAligned());
+            }
+
             int barcodeIndex = 0;
             int primerIndex = 0;
             
             if(pDataArray->barcodes.size() != 0){
                 if (thisfqualfile != "") {
-                    success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
+                    if ((thisfindexfile != "") || (thisrindexfile != "")) {
+                        success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
+                    }else {
+                        success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
+                    }
                 }else {
                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
                 }
@@ -315,7 +343,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
             
             if(trashCode.length() == 0){
                 bool ignore = false;
-                if (pDataArray->createGroup) {
+                if (pDataArray->createOligosGroup) {
                     if(pDataArray->barcodes.size() != 0){
                         string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
                         if (pDataArray->primers.size() != 0) { 
@@ -339,7 +367,17 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                             else { pDataArray->groupCounts[it->first] ++; }
                         }else { ignore = true; }
                     }
+                }else if (pDataArray->createFileGroup) {
+                    int pos = pDataArray->group.find("ignore");
+                    if (pos == string::npos) {
+                        pDataArray->groupMap[fSeq.getName()] = pDataArray->group;
+                        
+                        map<string, int>::iterator it = pDataArray->groupCounts.find(pDataArray->group);
+                        if (it == pDataArray->groupCounts.end()) {     pDataArray->groupCounts[pDataArray->group] = 1; }
+                        else { pDataArray->groupCounts[it->first]++; }
+                    }else { ignore = true; }
                 }
+
                 
                 if(pDataArray->allFiles && !ignore){
                     ofstream output;