1 #ifndef PAIRWISESEQSCOMMAND_H
2 #define PAIRWISESEQSCOMMAND_H
5 * pairwiseseqscommand.h
8 * Created by westcott on 10/20/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "database.hpp"
16 #include "alignment.hpp"
17 #include "validcalculator.h"
19 #include "sequencedb.h"
20 #include "sequence.hpp"
22 #include "gotohoverlap.hpp"
23 #include "needlemanoverlap.hpp"
24 #include "blastalign.hpp"
25 #include "noalign.hpp"
27 #include "ignoregaps.h"
28 #include "eachgapdist.h"
29 #include "eachgapignore.h"
30 #include "onegapdist.h"
31 #include "onegapignore.h"
33 class PairwiseSeqsCommand : public Command {
36 PairwiseSeqsCommand(string);
37 PairwiseSeqsCommand();
38 ~PairwiseSeqsCommand() {}
40 vector<string> setParameters();
41 string getCommandName() { return "pairwise.seqs"; }
42 string getCommandCategory() { return "Sequence Processing"; }
43 string getOutputFileNameTag(string, string);
44 string getHelpString();
45 string getCitation() { return "Needleman SB, Wunsch CD (1970). A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48: 443-53. [ for needleman ]\nGotoh O (1982). An improved algorithm for matching biological sequences. J Mol Biol 162: 705-8. [ for gotoh ] \nhttp://www.mothur.org/wiki/Pairwise.seqs"; }
46 string getDescription() { return "calculates pairwise distances from an unaligned fasta file"; }
49 void help() { m->mothurOut(getHelpString()); }
57 vector<int> processIDS; //end line, processid
58 vector<distlinePair> lines;
62 void createProcesses(string);
63 int driver(int, int, string, float);
64 int driver(int, int, string, string);
67 int driverMPI(int, int, MPI_File&, float);
68 int driverMPI(int, int, string, unsigned long long&);
69 int driverMPI(int, int, string, unsigned long long&, string);
72 string fastaFileName, align, calc, outputDir, output;
73 float match, misMatch, gapOpen, gapExtend, cutoff;
74 int processors, longestBase;
75 vector<string> fastaFileNames, Estimators;
76 vector<string> outputNames;
78 bool abort, countends, compress;
81 /**************************************************************************************************/
82 //custom data structure for threads to use.
83 // This is passed by void pointer so it can be any data type
84 // that can be passed using a single void pointer (LPVOID).
86 string outputFileName;
87 string align, square, distcalcType, output;
88 unsigned long long start;
89 unsigned long long end;
91 float match, misMatch, gapOpen, gapExtend, cutoff;
92 int count, threadID, longestBase;
97 pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
118 /**************************************************************************************************/
119 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
121 static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){
122 pairwiseData* pDataArray;
123 pDataArray = (pairwiseData*)lpParam;
126 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
127 outFile.setf(ios::fixed, ios::showpoint);
128 outFile << setprecision(4);
130 pDataArray->count = pDataArray->end;
132 int startTime = time(NULL);
134 Alignment* alignment;
135 if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
136 else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
137 else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); }
138 else if(pDataArray->align == "noalign") { alignment = new NoAlign(); }
140 pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
141 pDataArray->m->mothurOutEndLine();
142 alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
145 ValidCalculators validCalculator;
146 Dist* distCalculator;
147 if (pDataArray->countends) {
148 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
149 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
150 else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); }
151 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); }
154 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
155 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
156 else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
157 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
161 if(pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
163 for(int i=pDataArray->start;i<pDataArray->end;i++){
165 string name = pDataArray->alignDB.get(i).getName();
166 //pad with spaces to make compatible
167 if (name.length() < 10) { while (name.length() < 10) { name += " "; } }
169 outFile << name << '\t';
171 for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
173 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; }
175 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
176 alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
179 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
180 alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
183 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
184 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
186 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
187 seqI.setAligned(alignment->getSeqAAln());
188 seqJ.setAligned(alignment->getSeqBAln());
190 distCalculator->calcDist(seqI, seqJ);
191 double dist = distCalculator->getDist();
193 outFile << dist << '\t';
199 pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
203 pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
207 delete distCalculator;
211 catch(exception& e) {
212 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction");
217 /**************************************************************************************************/
218 static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){
219 pairwiseData* pDataArray;
220 pDataArray = (pairwiseData*)lpParam;
223 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
224 outFile.setf(ios::fixed, ios::showpoint);
225 outFile << setprecision(4);
227 pDataArray->count = pDataArray->end;
229 int startTime = time(NULL);
231 Alignment* alignment;
232 if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
233 else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
234 else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); }
235 else if(pDataArray->align == "noalign") { alignment = new NoAlign(); }
237 pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
238 pDataArray->m->mothurOutEndLine();
239 alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
242 ValidCalculators validCalculator;
243 Dist* distCalculator;
244 if (pDataArray->countends) {
245 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
246 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
247 else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); }
248 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); }
251 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
252 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
253 else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
254 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
258 if((pDataArray->output == "lt") && pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
260 for(int i=pDataArray->start;i<pDataArray->end;i++){
262 if(pDataArray->output == "lt") {
263 string name = pDataArray->alignDB.get(i).getName();
264 if (name.length() < 10) { //pad with spaces to make compatible
265 while (name.length() < 10) { name += " "; }
267 outFile << name << '\t';
271 for(int j=0;j<i;j++){
273 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; }
275 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
276 alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
279 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
280 alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
283 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
284 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
286 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
287 seqI.setAligned(alignment->getSeqAAln());
288 seqJ.setAligned(alignment->getSeqBAln());
290 distCalculator->calcDist(seqI, seqJ);
291 double dist = distCalculator->getDist();
293 if(dist <= pDataArray->cutoff){
294 if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
296 if (pDataArray->output == "lt") { outFile << dist << '\t'; }
299 if (pDataArray->output == "lt") { outFile << endl; }
302 pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
306 pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
310 delete distCalculator;
314 catch(exception& e) {
315 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction");