]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.h
working on pam
[mothur.git] / distancecommand.h
1 #ifndef DISTANCECOMMAND_H
2 #define DISTANCECOMMAND_H
3
4 /*
5  *  distancecommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 5/7/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "validcalculator.h"
16 #include "dist.h"
17 #include "sequencedb.h"
18 #include "ignoregaps.h"
19 #include "eachgapdist.h"
20 #include "eachgapignore.h"
21 #include "onegapdist.h"
22 #include "onegapignore.h"
23
24 //custom data structure for threads to use.
25 // This is passed by void pointer so it can be any data type
26 // that can be passed using a single void pointer (LPVOID).
27 struct distanceData {
28         int startLine;
29         int endLine;
30         string dFileName;
31         float cutoff;
32         SequenceDB alignDB;
33         vector<string> Estimators;
34         MothurOut* m;
35         string output;
36         int numNewFasta, count;
37         string countends;
38         
39         distanceData(){}
40         distanceData(int s, int e, string dbname, float c, SequenceDB db, vector<string> Est, MothurOut* mout, string o, int num, string count) {
41                 startLine = s;
42                 endLine = e;
43                 dFileName = dbname;
44                 cutoff = c;
45                 alignDB = db;
46                 Estimators = Est;
47                 m = mout;
48                 output = o;
49                 numNewFasta = num;
50                 countends = count;
51                 
52         }
53 };
54
55 /**************************************************************************************************/
56 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
57 #else
58 static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){ 
59         distanceData* pDataArray;
60         pDataArray = (distanceData*)lpParam;
61         
62         try {
63                 ValidCalculators validCalculator;
64                 Dist* distCalculator;
65                 if (pDataArray->m->isTrue(pDataArray->countends) == true) {
66                         for (int i=0; i<pDataArray->Estimators.size(); i++) {
67                                 if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { 
68                                         if (pDataArray->Estimators[i] == "nogaps")                      {       distCalculator = new ignoreGaps();      }
69                                         else if (pDataArray->Estimators[i] == "eachgap")        {       distCalculator = new eachGapDist();     }
70                                         else if (pDataArray->Estimators[i] == "onegap")         {       distCalculator = new oneGapDist();      }
71                                 }
72                         }
73                 }else {
74                         for (int i=0; i<pDataArray->Estimators.size(); i++) {
75                                 if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { 
76                                         if (pDataArray->Estimators[i] == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
77                                         else if (pDataArray->Estimators[i] == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
78                                         else if (pDataArray->Estimators[i] == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
79                                 }
80                         }
81                 }
82                 
83                 int startTime = time(NULL);
84                 
85                 //column file
86                 ofstream outFile(pDataArray->dFileName.c_str(), ios::trunc);
87                 outFile.setf(ios::fixed, ios::showpoint);
88                 outFile << setprecision(4);
89                 pDataArray->count = 0;
90                 
91                 if (pDataArray->output != "square") { 
92                         if((pDataArray->output == "lt") && (pDataArray->startLine == 0)){       outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
93                         
94                         for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
95                                 if(pDataArray->output == "lt")  {       
96                                         string name = pDataArray->alignDB.get(i).getName();
97                                         if (name.length() < 10) { //pad with spaces to make compatible
98                                                 while (name.length() < 10) {  name += " ";  }
99                                         }
100                                         outFile << name << '\t';        
101                                 }
102                                 for(int j=0;j<i;j++){
103                                         
104                                         if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
105                                         
106                                         //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
107                                         //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
108                                         if ((i >= pDataArray->numNewFasta) && (j >= pDataArray->numNewFasta)) { break; }
109                                         
110                                         distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
111                                         double dist = distCalculator->getDist();
112                                         
113                                         if(dist <= pDataArray->cutoff){
114                                                 if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
115                                         }
116                                         if (pDataArray->output == "lt") {  outFile << dist << '\t'; }
117                                 }
118                                 
119                                 if (pDataArray->output == "lt") { outFile << endl; }
120                                 
121                                 if(i % 100 == 0){
122                                         pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n");                               }
123                                 pDataArray->count++;
124                         }
125                         pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n");
126                 }else{
127                         if(pDataArray->startLine == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
128                         
129                         for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
130                                 
131                                 string name = pDataArray->alignDB.get(i).getName();
132                                 //pad with spaces to make compatible
133                                 if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
134                                 
135                                 outFile << name << '\t';        
136                                 
137                                 for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
138                                         
139                                         if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
140                                         
141                                         distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
142                                         double dist = distCalculator->getDist();
143                                         
144                                         outFile << dist << '\t'; 
145                                 }
146                                 
147                                 outFile << endl; 
148                                 
149                                 if(i % 100 == 0){
150                                         pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
151                                 }
152                                 pDataArray->count++;
153                         }
154                         pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n"); 
155                 }
156                 
157                 outFile.close();
158                 delete distCalculator;
159                 
160                 return 0; 
161         }
162         catch(exception& e) {
163                 pDataArray->m->errorOut(e, "DistanceCommand", "MyDistThreadFunction");
164                 exit(1);
165         }
166
167 #endif
168
169 /**************************************************************************************************/
170 class DistanceCommand : public Command {
171
172 public:
173         DistanceCommand(string);
174         DistanceCommand();
175         ~DistanceCommand() {}
176         
177         vector<string> setParameters();
178         string getCommandName()                 { return "dist.seqs";                   }
179         string getCommandCategory()             { return "Sequence Processing"; }
180         
181         string getHelpString(); 
182     string getOutputPattern(string);    
183         string getCitation() { return "Schloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844. \nhttp://www.mothur.org/wiki/Dist.seqs"; }
184         string getDescription()         { return "calculate the pairwaise distances between aligned sequences"; }
185
186         int execute(); 
187         void help() { m->mothurOut(getHelpString()); }  
188         
189         
190 private:
191         struct distlinePair {
192                 int start;
193                 int end;
194                 
195         };
196         
197         //Dist* distCalculator;
198         SequenceDB alignDB;
199
200         string countends, output, fastafile, calc, outputDir, oldfastafile, column, compress;
201
202         int processors, numNewFasta;
203         float cutoff;
204         vector<int> processIDS;   //end line, processid
205         vector<distlinePair> lines;
206         
207         bool abort;
208         vector<string>  Estimators, outputNames; //holds estimators to be used
209         
210         //void m->appendFiles(string, string);
211         void createProcesses(string);
212         int driver(/*Dist*, SequenceDB, */int, int, string, float);
213         int driver(int, int, string, string);
214         
215         #ifdef USE_MPI 
216         int driverMPI(int, int, MPI_File&, float);
217         int driverMPI(int, int, string, unsigned long long&);
218         int driverMPI(int, int, string, unsigned long long&, string);
219         #endif
220         
221         //int convertMatrix(string);
222         bool sanityCheck();
223         //int convertToLowerTriangle(string);
224
225 };
226
227 #endif
228
229 /**************************************************************************************************/
230
231
232