1 #ifndef Mothur_makecontigscommand_h
2 #define Mothur_makecontigscommand_h
5 // makecontigscommand.h
8 // Created by Sarah Westcott on 5/15/12.
9 // Copyright (c) 2012 Schloss Lab. All rights reserved.
12 #include "command.hpp"
13 #include "sequence.hpp"
14 #include "qualityscores.h"
15 #include "alignment.hpp"
16 #include "gotohoverlap.hpp"
17 #include "needlemanoverlap.hpp"
18 #include "blastalign.hpp"
19 #include "noalign.hpp"
20 #include "trimoligos.h"
27 fastqRead() { name = ""; sequence = ""; scores.clear(); };
28 fastqRead(string n, string s, vector<int> sc) : name(n), sequence(s), scores(sc) {};
32 /**************************************************************************************************/
34 class MakeContigsCommand : public Command {
36 MakeContigsCommand(string);
38 ~MakeContigsCommand(){}
40 vector<string> setParameters();
41 string getCommandName() { return "make.contigs"; }
42 string getCommandCategory() { return "Sequence Processing"; }
43 //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
44 string getOutputFileNameTag(string, string);
45 string getHelpString();
46 string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; }
47 string getDescription() { return "description"; }
50 void help() { m->mothurOut(getHelpString()); }
54 string outputDir, ffastqfile, rfastqfile, align, oligosfile;
55 float match, misMatch, gapOpen, gapExtend;
56 int processors, longestBase, threshold, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
57 vector<string> outputNames;
59 map<int, oligosPair> barcodes;
60 map<int, oligosPair> primers;
61 vector<string> linker;
62 vector<string> spacer;
63 vector<string> primerNameVector;
64 vector<string> barcodeNameVector;
66 map<string, int> groupCounts;
67 //map<string, int> combos;
68 //map<string, int> groupToIndex;
69 //vector<string> groupVector;
71 fastqRead readFastq(ifstream&);
72 vector< vector<string> > readFastqFiles(int&);
73 bool checkReads(fastqRead&, fastqRead&);
74 int createProcesses(vector< vector<string> >, string, string, string);
75 int driver(vector<string>, string, string, string);
76 bool getOligos(vector<vector<string> >&, vector<vector<string> >&);
77 string reverseOligo(string);
80 /**************************************************************************************************/
82 /**************************************************************************************************/
83 //custom data structure for threads to use.
84 // This is passed by void pointer so it can be any data type
85 // that can be passed using a single void pointer (LPVOID).
89 string outputMisMatches;
93 float match, misMatch, gapOpen, gapExtend;
94 int count, threshold, threadID;
97 contigsData(vector<string> f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
101 outputMisMatches = om;
114 /**************************************************************************************************/
115 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
117 static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
118 contigsData* pDataArray;
119 pDataArray = (contigsData*)lpParam;
122 int longestBase = 1000;
123 Alignment* alignment;
124 if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase); }
125 else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase); }
128 string thisffastafile = pDataArray->files[0];
129 string thisfqualfile = pDataArray->files[1];
130 string thisrfastafile = pDataArray->files[2];
131 string thisrqualfile = pDataArray->files[3];
133 if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
135 ifstream inFFasta, inRFasta, inFQual, inRQual;
136 pDataArray->m->openInputFile(thisffastafile, inFFasta);
137 pDataArray->m->openInputFile(thisfqualfile, inFQual);
138 pDataArray->m->openInputFile(thisrfastafile, inRFasta);
139 pDataArray->m->openInputFile(thisrqualfile, inRQual);
141 ofstream outFasta, outQual, outMisMatch;
142 pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
143 pDataArray->m->openOutputFile(pDataArray->outputQual, outQual);
144 pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
145 outMisMatch << "Name\tLength\tMisMatches\n";
147 while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
149 if (pDataArray->m->control_pressed) { break; }
151 //read seqs and quality info
152 Sequence fSeq(inFFasta); pDataArray->m->gobble(inFFasta);
153 Sequence rSeq(inRFasta); pDataArray->m->gobble(inRFasta);
154 QualityScores fQual(inFQual); pDataArray->m->gobble(inFQual);
155 QualityScores rQual(inRQual); pDataArray->m->gobble(inRQual);
157 //flip the reverse reads
158 rSeq.reverseComplement();
162 alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
163 map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
164 map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
165 fSeq.setAligned(alignment->getSeqAAln());
166 rSeq.setAligned(alignment->getSeqBAln());
167 int length = fSeq.getAligned().length();
169 //traverse alignments merging into one contiguous seq
171 vector<int> contigScores;
172 int numMismatches = 0;
173 string seq1 = fSeq.getAligned();
174 string seq2 = rSeq.getAligned();
176 vector<int> scores1 = fQual.getQualityScores();
177 vector<int> scores2 = rQual.getQualityScores();
179 for (int i = 0; i < length; i++) {
180 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
182 contigScores.push_back(scores1[ABaseMap[i]]);
183 if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
184 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
185 if (scores2[BBaseMap[i]] >= pDataArray->threshold) {
187 contigScores.push_back(scores2[BBaseMap[i]]);
189 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
190 if (scores1[ABaseMap[i]] >= pDataArray->threshold) {
192 contigScores.push_back(scores1[ABaseMap[i]]);
194 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
196 contigScores.push_back(scores1[ABaseMap[i]]);
197 if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; }
200 }else { //should never get here
201 pDataArray->m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
206 outFasta << ">" << fSeq.getName() << endl << contig << endl;
207 outQual << ">" << fSeq.getName() << endl;
208 for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
210 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
215 if((num) % 1000 == 0){ pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine(); }
219 if((num) % 1000 != 0){ pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine(); }
230 if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputFasta); pDataArray->m->mothurRemove(pDataArray->outputMisMatches);}
235 catch(exception& e) {
236 pDataArray->m->errorOut(e, "AlignCommand", "MyContigsThreadFunction");