1 #ifndef DISTANCECOMMAND_H
2 #define DISTANCECOMMAND_H
8 * Created by Sarah Westcott on 5/7/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
14 #include "command.hpp"
15 #include "validcalculator.h"
17 #include "sequencedb.h"
18 #include "ignoregaps.h"
19 #include "eachgapdist.h"
20 #include "eachgapignore.h"
21 #include "onegapdist.h"
22 #include "onegapignore.h"
24 //custom data structure for threads to use.
25 // This is passed by void pointer so it can be any data type
26 // that can be passed using a single void pointer (LPVOID).
33 vector<string> Estimators;
36 int numNewFasta, count;
40 distanceData(int s, int e, string dbname, float c, SequenceDB db, vector<string> Est, MothurOut* mout, string o, int num, string count) {
55 /**************************************************************************************************/
56 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
58 static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){
59 distanceData* pDataArray;
60 pDataArray = (distanceData*)lpParam;
63 ValidCalculators validCalculator;
65 if (pDataArray->m->isTrue(pDataArray->countends) == true) {
66 for (int i=0; i<pDataArray->Estimators.size(); i++) {
67 if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) {
68 if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
69 else if (pDataArray->Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
70 else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
74 for (int i=0; i<pDataArray->Estimators.size(); i++) {
75 if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) {
76 if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
77 else if (pDataArray->Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
78 else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
83 int startTime = time(NULL);
86 ofstream outFile(pDataArray->dFileName.c_str(), ios::trunc);
87 outFile.setf(ios::fixed, ios::showpoint);
88 outFile << setprecision(4);
89 pDataArray->count = 0;
91 if (pDataArray->output != "square") {
92 if((pDataArray->output == "lt") && (pDataArray->startLine == 0)){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
94 for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
95 if(pDataArray->output == "lt") {
96 string name = pDataArray->alignDB.get(i).getName();
97 if (name.length() < 10) { //pad with spaces to make compatible
98 while (name.length() < 10) { name += " "; }
100 outFile << name << '\t';
102 for(int j=0;j<i;j++){
104 if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0; }
106 //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
107 //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
108 if ((i >= pDataArray->numNewFasta) && (j >= pDataArray->numNewFasta)) { break; }
110 distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
111 double dist = distCalculator->getDist();
113 if(dist <= pDataArray->cutoff){
114 if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
116 if (pDataArray->output == "lt") { outFile << dist << '\t'; }
119 if (pDataArray->output == "lt") { outFile << endl; }
122 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); }
125 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n");
127 if(pDataArray->startLine == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
129 for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
131 string name = pDataArray->alignDB.get(i).getName();
132 //pad with spaces to make compatible
133 if (name.length() < 10) { while (name.length() < 10) { name += " "; } }
135 outFile << name << '\t';
137 for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
139 if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0; }
141 distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
142 double dist = distCalculator->getDist();
144 outFile << dist << '\t';
150 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");
154 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n");
158 delete distCalculator;
162 catch(exception& e) {
163 pDataArray->m->errorOut(e, "DistanceCommand", "MyDistThreadFunction");
169 /**************************************************************************************************/
170 class DistanceCommand : public Command {
173 DistanceCommand(string);
175 ~DistanceCommand() {}
177 vector<string> setParameters();
178 string getCommandName() { return "dist.seqs"; }
179 string getCommandCategory() { return "Sequence Processing"; }
181 string getHelpString();
182 string getOutputPattern(string);
183 string getCitation() { return "Schloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844. \nhttp://www.mothur.org/wiki/Dist.seqs"; }
184 string getDescription() { return "calculate the pairwaise distances between aligned sequences"; }
187 void help() { m->mothurOut(getHelpString()); }
191 struct distlinePair {
197 //Dist* distCalculator;
200 string countends, output, fastafile, calc, outputDir, oldfastafile, column, compress;
202 int processors, numNewFasta;
204 vector<int> processIDS; //end line, processid
205 vector<distlinePair> lines;
208 vector<string> Estimators, outputNames; //holds estimators to be used
210 //void m->appendFiles(string, string);
211 void createProcesses(string);
212 int driver(/*Dist*, SequenceDB, */int, int, string, float);
213 int driver(int, int, string, string);
216 int driverMPI(int, int, MPI_File&, float);
217 int driverMPI(int, int, string, unsigned long long&);
218 int driverMPI(int, int, string, unsigned long long&, string);
221 //int convertMatrix(string);
223 //int convertToLowerTriangle(string);
229 /**************************************************************************************************/