]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
fixed some bugs
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 countends = globaldata->getCountEnds();
24                 convert(globaldata->getProcessors(), processors);
25                 convert(globaldata->getCutOff(), cutoff);
26                 
27                 int i;
28                 if (isTrue(countends) == true) {
29                         for (i=0; i<globaldata->Estimators.size(); i++) {
30                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
31                                         if (globaldata->Estimators[i] == "nogaps") { 
32                                                 distCalculator = new ignoreGaps();
33                                         }else if (globaldata->Estimators[i] == "eachgap") { 
34                                                 distCalculator = new eachGapDist();     
35                                         }else if (globaldata->Estimators[i] == "onegap") {
36                                         distCalculator = new oneGapDist();                                      }
37                                 }
38                         }
39                 }else {
40                         for (i=0; i<globaldata->Estimators.size(); i++) {
41                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
42                                         if (globaldata->Estimators[i] == "nogaps") { 
43                                                 distCalculator = new ignoreGaps();      
44                                         }else if (globaldata->Estimators[i] == "eachgap") { 
45                                                 distCalculator = new eachGapIgnoreTermGapDist();
46                                         }else if (globaldata->Estimators[i] == "onegap") { 
47                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
48                                         }
49                                 }
50                         }
51                 }
52                 
53                 //reset calc for next command
54                 globaldata->setCalc("");
55         }
56         catch(exception& e) {
57                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
58                 exit(1);
59         }
60         catch(...) {
61                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
62                 exit(1);
63         }       
64 }
65 //**********************************************************************************************************************
66
67 int DistanceCommand::execute(){
68         try {
69                 
70                 //read file
71                 string filename = globaldata->inputFileName;
72                 
73                 if(globaldata->getFastaFile() != "") {
74                 readSeqs =  new ReadFasta(filename); }
75                 else if(globaldata->getNexusFile() != "") {
76                 readSeqs = new ReadNexus(filename); }
77                 else if(globaldata->getClustalFile() != "") {
78                 readSeqs = new ReadClustal(filename); }
79                 else if(globaldata->getPhylipFile() != "") {
80                 readSeqs = new ReadPhylip(filename); }
81                 
82                 readSeqs->read();
83                 seqDB = readSeqs->getDB();
84                 
85                 int numSeqs = seqDB->getNumSeqs();
86                 cutoff += 0.005;
87                 
88                 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
89                 
90                 remove(distFile.c_str());
91                 
92                 //#     if defined (_WIN32)
93                 //figure out how to implement the fork and wait commands in windows
94                 //      driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
95                 //#     endif
96                 
97 #if defined (__APPLE__) || (__MACH__)
98                 if(processors == 1){
99                         driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);    
100                 }       
101                 else if(processors == 2){
102                         
103                         int pid = fork();
104                         if(pid > 0){
105                                 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);        
106                                 appendFiles((distFile+"tempa"), distFile);
107                                 remove((distFile + "tempa").c_str());   
108                         }
109                         else{
110                                 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);  
111                                 appendFiles((distFile+"tempb"), distFile);
112                                 remove((distFile + "tempb").c_str());   
113                         }
114                         wait(NULL);
115                         
116                 }
117                 else if(processors == 3){
118                         int pid1 = fork();
119                         if(pid1 > 0){
120                                 int pid2 = fork();
121                                 if(pid2 > 0){
122                                         driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
123                                         appendFiles(distFile+"tempa", distFile);
124                                         remove((distFile + "tempa").c_str());   
125                                 }
126                                 else{
127                                         driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);        
128                                         appendFiles(distFile+"tempb", distFile);
129                                         remove((distFile + "tempb").c_str());                           
130                                 }
131                                 wait(NULL);
132                         }
133                         else{
134                                 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);      
135                                 appendFiles(distFile+"tempc", distFile);
136                                 remove((distFile + "tempc").c_str());                   
137                         }
138                         wait(NULL);
139                 }
140                 else if(processors == 4){
141                         int pid1 = fork();
142                         if(pid1 > 0){
143                                 int pid2 = fork();
144                                 if(pid2 > 0){
145                                         driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);      
146                                         appendFiles(distFile+"tempa", distFile);
147                                         remove((distFile + "tempa").c_str());                   
148                                 }
149                                 else{
150                                         driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);      
151                                         appendFiles(distFile+"tempb", distFile);
152                                         remove((distFile + "tempb").c_str());                           
153                                 }
154                                 wait(NULL);
155                         }
156                         else{
157                                 int pid3 = fork();
158                                 if(pid3 > 0){
159                                         driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);  
160                                         appendFiles(distFile+"tempc", distFile);
161                                         remove((distFile + "tempc").c_str());                           
162                                 }
163                                 else{
164                                         driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);    
165                                         appendFiles(distFile+"tempd", distFile);
166                                         remove((distFile + "tempd").c_str());                           
167                                 }
168                                 wait(NULL);
169                         }
170                         wait(NULL);
171                 }
172                 wait(NULL);
173 #elif (linux) || (__linux)
174                 if(processors == 1){
175                         driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);    
176                 }       
177                 else if(processors == 2){
178                         
179                         int pid = fork();
180                         if(pid > 0){
181                                 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);        
182                                 appendFiles((distFile+"tempa"), distFile);
183                                 remove((distFile + "tempa").c_str());   
184                         }
185                         else{
186                                 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);  
187                                 appendFiles((distFile+"tempb"), distFile);
188                                 remove((distFile + "tempb").c_str());   
189                         }
190                         wait();
191                         
192                 }
193                 else if(processors == 3){
194                         int pid1 = fork();
195                         if(pid1 > 0){
196                                 int pid2 = fork();
197                                 if(pid2 > 0){
198                                         driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
199                                         appendFiles(distFile+"tempa", distFile);
200                                         remove((distFile + "tempa").c_str());   
201                                 }
202                                 else{
203                                         driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);        
204                                         appendFiles(distFile+"tempb", distFile);
205                                         remove((distFile + "tempb").c_str());                           
206                                 }
207                                 wait();
208                         }
209                         else{
210                                 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);      
211                                 appendFiles(distFile+"tempc", distFile);
212                                 remove((distFile + "tempc").c_str());                   
213                         }
214                         wait();
215                 }
216                 else if(processors == 4){
217                         int pid1 = fork();
218                         if(pid1 > 0){
219                                 int pid2 = fork();
220                                 if(pid2 > 0){
221                                         driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);      
222                                         appendFiles(distFile+"tempa", distFile);
223                                         remove((distFile + "tempa").c_str());                   
224                                 }
225                                 else{
226                                         driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);      
227                                         appendFiles(distFile+"tempb", distFile);
228                                         remove((distFile + "tempb").c_str());                           
229                                 }
230                                 wait();
231                         }
232                         else{
233                                 int pid3 = fork();
234                                 if(pid3 > 0){
235                                         driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);  
236                                         appendFiles(distFile+"tempc", distFile);
237                                         remove((distFile + "tempc").c_str());                           
238                                 }
239                                 else{
240                                         driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);    
241                                         appendFiles(distFile+"tempd", distFile);
242                                         remove((distFile + "tempd").c_str());                           
243                                 }
244                                 wait();
245                         }
246                         wait();
247                 }
248                 wait();
249                 
250 #else
251                 driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
252 #endif
253                 
254                 delete distCalculator;
255                 
256                 return 0;
257                 
258         }
259         catch(exception& e) {
260                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
261                 exit(1);
262         }
263         catch(...) {
264                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
265                 exit(1);
266         }       
267 }
268
269 /**************************************************************************************************/
270 /////// need to fix to work with calcs and sequencedb
271 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
272         try {
273                 int startTime = time(NULL);
274                 
275                 ofstream distFile(dFileName.c_str(), ios::trunc);
276                 distFile.setf(ios::fixed, ios::showpoint);
277                 distFile << setprecision(4);
278                 
279                 for(int i=startLine;i<endLine;i++){
280                         
281                         for(int j=0;j<i;j++){
282                                 distCalculator->calcDist(align->get(i), align->get(j));
283                                 double dist = distCalculator->getDist();
284                                 
285                                 if(dist <= cutoff){
286                                         distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
287                                 }
288                                 
289                         }
290                         if(i % 100 == 0){
291                                 cout << i << '\t' << time(NULL) - startTime << endl;
292                         }
293                         
294                 }
295                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
296                 
297                 return 1;
298         }
299         catch(exception& e) {
300                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
301                 exit(1);
302         }
303         catch(...) {
304                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
305                 exit(1);
306         }       
307         
308 }
309
310 /**************************************************************************************************/
311 void DistanceCommand::appendFiles(string temp, string filename) {
312         try{
313                 ofstream output;
314                 ifstream input;
315                 
316                 //open output file in append mode
317                 openOutputFileAppend(filename, output);
318                 
319                 //open temp file for reading
320                 openInputFile(temp, input);
321                 
322                 string line;
323                 //read input file and write to output file
324                 while(input.eof() != true) {
325                         getline(input, line); //getline removes the newline char
326                         if (line != "") {
327                                 output << line << endl;   // Appending back newline char 
328                         }
329                 }       
330                 
331                 input.close();
332                 output.close();
333         }
334         catch(exception& e) {
335                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
336                 exit(1);
337         }
338         catch(...) {
339                 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
340                 exit(1);
341         }       
342 }
343 /**************************************************************************************************/