]> git.donarmstrong.com Git - mothur.git/blobdiff - makecontigscommand.h
added dups parameter to chimera.uchime. working on make.contigs command.
[mothur.git] / makecontigscommand.h
index 779d35cb284449293a53b60d8eed83755d08ffc2..84e43c01c9a0b8c7775c6a46494a6fc0fcb642a6 100644 (file)
@@ -17,7 +17,7 @@
 #include "needlemanoverlap.hpp"
 #include "blastalign.hpp"
 #include "noalign.hpp"
-
+#include "trimoligos.h"
 
 struct fastqRead {
        vector<int> scores;
@@ -41,7 +41,8 @@ public:
     string getCommandName()                    { return "make.contigs";                        }
     string getCommandCategory()                { return "Sequence Processing";         } 
     //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
-    string getHelpString();    
+    string getOutputFileNameTag(string, string);
+       string getHelpString(); 
     string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; }
     string getDescription()            { return "description"; }
     
@@ -49,17 +50,31 @@ public:
     void help() { m->mothurOut(getHelpString()); }     
     
 private:
-    bool abort;
-    string outputDir, ffastqfile, rfastqfile, align;
+    bool abort, allFiles;
+    string outputDir, ffastqfile, rfastqfile, align, oligosfile;
        float match, misMatch, gapOpen, gapExtend;
-       int processors, longestBase;
+       int processors, longestBase, threshold, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
     vector<string> outputNames;
     
+    map<int, oligosPair> barcodes;
+       map<int, oligosPair> primers;
+    vector<string>  linker;
+    vector<string>  spacer;
+       vector<string> primerNameVector;        
+       vector<string> barcodeNameVector;       
+    
+       map<string, int> groupCounts;  
+    //map<string, int> combos;
+       //map<string, int> groupToIndex;
+    //vector<string> groupVector;
+    
     fastqRead readFastq(ifstream&);
     vector< vector<string> > readFastqFiles(int&);
     bool checkReads(fastqRead&, fastqRead&);
     int createProcesses(vector< vector<string> >, string, string, string);
     int driver(vector<string>, string, string, string);
+    bool getOligos(vector<vector<string> >&, vector<vector<string> >&);
+    string reverseOligo(string);
 };
 
 /**************************************************************************************************/
@@ -76,10 +91,10 @@ struct contigsData {
     vector<string> files;
        MothurOut* m;
        float match, misMatch, gapOpen, gapExtend;
-       int count, threadID;
+       int count, threshold, threadID;
        
        contigsData(){}
-       contigsData(vector<string> f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int tid) {
+       contigsData(vector<string> f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
         files = f;
                outputFasta = of;
         outputQual = oq;
@@ -89,6 +104,7 @@ struct contigsData {
                misMatch = misMa;
                gapOpen = gapO; 
                gapExtend = gapE; 
+        threshold = thr;
                align = al;
                count = 0;
                threadID = tid;
@@ -165,12 +181,16 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                     contig += seq1[i];
                     contigScores.push_back(scores1[ABaseMap[i]]);
                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
-                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2
-                    contig += seq2[i];
-                    contigScores.push_back(scores2[BBaseMap[i]]);
-                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1
-                    contig += seq1[i];
-                    contigScores.push_back(scores1[ABaseMap[i]]);
+                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
+                    if (scores2[BBaseMap[i]] >= pDataArray->threshold) {
+                        contig += seq2[i];
+                        contigScores.push_back(scores2[BBaseMap[i]]);
+                    }
+                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
+                    if (scores1[ABaseMap[i]] >= pDataArray->threshold) { 
+                        contig += seq1[i];
+                        contigScores.push_back(scores1[ABaseMap[i]]);
+                    }
                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
                     char c = seq1[i];
                     contigScores.push_back(scores1[ABaseMap[i]]);