1 #ifndef PAIRWISESEQSCOMMAND_H
2 #define PAIRWISESEQSCOMMAND_H
5 * pairwiseseqscommand.h
8 * Created by westcott on 10/20/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "database.hpp"
16 #include "alignment.hpp"
17 #include "validcalculator.h"
19 #include "sequencedb.h"
20 #include "sequence.hpp"
22 #include "gotohoverlap.hpp"
23 #include "needlemanoverlap.hpp"
24 #include "blastalign.hpp"
25 #include "noalign.hpp"
27 #include "ignoregaps.h"
28 #include "eachgapdist.h"
29 #include "eachgapignore.h"
30 #include "onegapdist.h"
31 #include "onegapignore.h"
33 class PairwiseSeqsCommand : public Command {
36 PairwiseSeqsCommand(string);
37 PairwiseSeqsCommand();
38 ~PairwiseSeqsCommand() {}
40 vector<string> setParameters();
41 string getCommandName() { return "pairwise.seqs"; }
42 string getCommandCategory() { return "Sequence Processing"; }
44 string getHelpString();
45 string getOutputPattern(string);
46 string getCitation() { return "Needleman SB, Wunsch CD (1970). A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48: 443-53. [ for needleman ]\nGotoh O (1982). An improved algorithm for matching biological sequences. J Mol Biol 162: 705-8. [ for gotoh ] \nhttp://www.mothur.org/wiki/Pairwise.seqs"; }
47 string getDescription() { return "calculates pairwise distances from an unaligned fasta file"; }
50 void help() { m->mothurOut(getHelpString()); }
58 vector<int> processIDS; //end line, processid
59 vector<distlinePair> lines;
63 void createProcesses(string);
64 int driver(int, int, string, float);
65 int driver(int, int, string, string);
68 int driverMPI(int, int, MPI_File&, float);
69 int driverMPI(int, int, string, unsigned long long&);
70 int driverMPI(int, int, string, unsigned long long&, string);
73 string fastaFileName, align, calc, outputDir, output;
74 float match, misMatch, gapOpen, gapExtend, cutoff;
75 int processors, longestBase;
76 vector<string> fastaFileNames, Estimators;
77 vector<string> outputNames;
79 bool abort, countends, compress;
82 /**************************************************************************************************/
83 //custom data structure for threads to use.
84 // This is passed by void pointer so it can be any data type
85 // that can be passed using a single void pointer (LPVOID).
87 string outputFileName;
88 string align, square, distcalcType, output;
89 unsigned long long start;
90 unsigned long long end;
92 float match, misMatch, gapOpen, gapExtend, cutoff;
93 int count, threadID, longestBase;
98 pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, float cu, int tid) {
120 /**************************************************************************************************/
121 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
123 static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){
124 pairwiseData* pDataArray;
125 pDataArray = (pairwiseData*)lpParam;
128 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
129 outFile.setf(ios::fixed, ios::showpoint);
130 outFile << setprecision(4);
132 pDataArray->count = 0;
134 int startTime = time(NULL);
136 Alignment* alignment;
137 if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
138 else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
139 else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); }
140 else if(pDataArray->align == "noalign") { alignment = new NoAlign(); }
142 pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
143 pDataArray->m->mothurOutEndLine();
144 alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
147 ValidCalculators validCalculator;
148 Dist* distCalculator;
149 if (pDataArray->countends) {
150 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
151 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
152 else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); }
153 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); }
156 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
157 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
158 else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
159 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
163 if(pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
165 for(int i=pDataArray->start;i<pDataArray->end;i++){
168 string name = pDataArray->alignDB.get(i).getName();
169 //pad with spaces to make compatible
170 if (name.length() < 10) { while (name.length() < 10) { name += " "; } }
172 outFile << name << '\t';
174 for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
176 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; }
178 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
179 alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
182 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
183 alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
186 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
187 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
189 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
190 seqI.setAligned(alignment->getSeqAAln());
191 seqJ.setAligned(alignment->getSeqBAln());
193 distCalculator->calcDist(seqI, seqJ);
194 double dist = distCalculator->getDist();
196 if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' + alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); }
198 outFile << dist << '\t';
204 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");
208 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n");
212 delete distCalculator;
216 catch(exception& e) {
217 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction");
222 /**************************************************************************************************/
223 static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){
224 pairwiseData* pDataArray;
225 pDataArray = (pairwiseData*)lpParam;
228 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
229 outFile.setf(ios::fixed, ios::showpoint);
230 outFile << setprecision(4);
232 int startTime = time(NULL);
234 Alignment* alignment;
235 if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
236 else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
237 else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); }
238 else if(pDataArray->align == "noalign") { alignment = new NoAlign(); }
240 pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
241 pDataArray->m->mothurOutEndLine();
242 alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
245 ValidCalculators validCalculator;
246 Dist* distCalculator;
247 if (pDataArray->countends) {
248 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
249 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
250 else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); }
251 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); }
254 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
255 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
256 else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
257 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
261 if((pDataArray->output == "lt") && pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
263 pDataArray->count = 0;
264 for(int i=pDataArray->start;i<pDataArray->end;i++){
267 if(pDataArray->output == "lt") {
268 string name = pDataArray->alignDB.get(i).getName();
269 if (name.length() < 10) { //pad with spaces to make compatible
270 while (name.length() < 10) { name += " "; }
272 outFile << name << '\t';
276 for(int j=0;j<i;j++){
278 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; }
280 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
281 alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
284 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
285 alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
288 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
289 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
291 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
292 seqI.setAligned(alignment->getSeqAAln());
293 seqJ.setAligned(alignment->getSeqBAln());
295 distCalculator->calcDist(seqI, seqJ);
296 double dist = distCalculator->getDist();
298 if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' + alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); }
300 if(dist <= pDataArray->cutoff){
301 if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
303 if (pDataArray->output == "lt") { outFile << dist << '\t'; }
306 if (pDataArray->output == "lt") { outFile << endl; }
309 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");
313 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)+"\n");
317 delete distCalculator;
321 catch(exception& e) {
322 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction");