1 #ifndef PAIRWISESEQSCOMMAND_H
2 #define PAIRWISESEQSCOMMAND_H
5 * pairwiseseqscommand.h
8 * Created by westcott on 10/20/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "database.hpp"
16 #include "alignment.hpp"
17 #include "validcalculator.h"
19 #include "sequencedb.h"
20 #include "sequence.hpp"
22 #include "gotohoverlap.hpp"
23 #include "needlemanoverlap.hpp"
24 #include "blastalign.hpp"
25 #include "noalign.hpp"
27 #include "ignoregaps.h"
28 #include "eachgapdist.h"
29 #include "eachgapignore.h"
30 #include "onegapdist.h"
31 #include "onegapignore.h"
33 class PairwiseSeqsCommand : public Command {
36 PairwiseSeqsCommand(string);
37 PairwiseSeqsCommand();
38 ~PairwiseSeqsCommand() {}
40 vector<string> setParameters();
41 string getCommandName() { return "pairwise.seqs"; }
42 string getCommandCategory() { return "Sequence Processing"; }
44 string getHelpString();
45 string getOutputPattern(string);
46 string getCitation() { return "Needleman SB, Wunsch CD (1970). A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48: 443-53. [ for needleman ]\nGotoh O (1982). An improved algorithm for matching biological sequences. J Mol Biol 162: 705-8. [ for gotoh ] \nhttp://www.mothur.org/wiki/Pairwise.seqs"; }
47 string getDescription() { return "calculates pairwise distances from an unaligned fasta file"; }
50 void help() { m->mothurOut(getHelpString()); }
58 vector<int> processIDS; //end line, processid
59 vector<distlinePair> lines;
63 void createProcesses(string);
64 int driver(int, int, string, float);
65 int driver(int, int, string, string);
68 int driverMPI(int, int, MPI_File&, float);
69 int driverMPI(int, int, string, unsigned long long&);
70 int driverMPI(int, int, string, unsigned long long&, string);
73 string fastaFileName, align, calc, outputDir, output;
74 float match, misMatch, gapOpen, gapExtend, cutoff;
75 int processors, longestBase;
76 vector<string> fastaFileNames, Estimators;
77 vector<string> outputNames;
79 bool abort, countends, compress;
82 /**************************************************************************************************/
83 //custom data structure for threads to use.
84 // This is passed by void pointer so it can be any data type
85 // that can be passed using a single void pointer (LPVOID).
87 string outputFileName;
88 string align, square, distcalcType, output;
89 unsigned long long start;
90 unsigned long long end;
92 float match, misMatch, gapOpen, gapExtend, cutoff;
93 int count, threadID, longestBase;
98 pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, float cu, int tid) {
120 /**************************************************************************************************/
121 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
123 static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){
124 pairwiseData* pDataArray;
125 pDataArray = (pairwiseData*)lpParam;
128 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
129 outFile.setf(ios::fixed, ios::showpoint);
130 outFile << setprecision(4);
132 pDataArray->count = 0;
134 int startTime = time(NULL);
136 Alignment* alignment;
137 if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
138 else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
139 else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); }
140 else if(pDataArray->align == "noalign") { alignment = new NoAlign(); }
142 pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
143 pDataArray->m->mothurOutEndLine();
144 alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
147 ValidCalculators validCalculator;
148 Dist* distCalculator;
149 if (pDataArray->countends) {
150 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
151 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
152 else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); }
153 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); }
156 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
157 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
158 else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
159 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
163 if(pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
165 for(int i=pDataArray->start;i<pDataArray->end;i++){
168 string name = pDataArray->alignDB.get(i).getName();
169 //pad with spaces to make compatible
170 if (name.length() < 10) { while (name.length() < 10) { name += " "; } }
172 outFile << name << '\t';
174 for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
176 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; }
178 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
179 alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
182 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
183 alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
186 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
187 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
189 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
190 seqI.setAligned(alignment->getSeqAAln());
191 seqJ.setAligned(alignment->getSeqBAln());
193 distCalculator->calcDist(seqI, seqJ);
194 double dist = distCalculator->getDist();
196 outFile << dist << '\t';
202 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");
206 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n");
210 delete distCalculator;
214 catch(exception& e) {
215 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction");
220 /**************************************************************************************************/
221 static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){
222 pairwiseData* pDataArray;
223 pDataArray = (pairwiseData*)lpParam;
226 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
227 outFile.setf(ios::fixed, ios::showpoint);
228 outFile << setprecision(4);
230 int startTime = time(NULL);
232 Alignment* alignment;
233 if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
234 else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); }
235 else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); }
236 else if(pDataArray->align == "noalign") { alignment = new NoAlign(); }
238 pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
239 pDataArray->m->mothurOutEndLine();
240 alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
243 ValidCalculators validCalculator;
244 Dist* distCalculator;
245 if (pDataArray->countends) {
246 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
247 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
248 else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); }
249 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); }
252 if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) {
253 if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); }
254 else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
255 else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
259 if((pDataArray->output == "lt") && pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
261 pDataArray->count = 0;
262 for(int i=pDataArray->start;i<pDataArray->end;i++){
265 if(pDataArray->output == "lt") {
266 string name = pDataArray->alignDB.get(i).getName();
267 if (name.length() < 10) { //pad with spaces to make compatible
268 while (name.length() < 10) { name += " "; }
270 outFile << name << '\t';
274 for(int j=0;j<i;j++){
276 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; }
278 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
279 alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
282 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
283 alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
286 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
287 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
289 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
290 seqI.setAligned(alignment->getSeqAAln());
291 seqJ.setAligned(alignment->getSeqBAln());
293 distCalculator->calcDist(seqI, seqJ);
294 double dist = distCalculator->getDist();
296 if(dist <= pDataArray->cutoff){
297 if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
299 if (pDataArray->output == "lt") { outFile << dist << '\t'; }
302 if (pDataArray->output == "lt") { outFile << endl; }
305 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");
309 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)+"\n");
313 delete distCalculator;
317 catch(exception& e) {
318 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction");