]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
fixed how seq.dists divides the matrix for multiple processors
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 countends = globaldata->getCountEnds();
24                 convert(globaldata->getProcessors(), processors);
25                 convert(globaldata->getCutOff(), cutoff);
26                 phylip = globaldata->getPhylipFile();
27                 
28                 //open file
29                 string filename = globaldata->getFastaFile();
30                 openInputFile(filename, in);
31                 
32
33                 
34                 int i;
35                 if (isTrue(countends) == true) {
36                         for (i=0; i<globaldata->Estimators.size(); i++) {
37                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
38                                         if (globaldata->Estimators[i] == "nogaps") { 
39                                                 distCalculator = new ignoreGaps();
40                                         }else if (globaldata->Estimators[i] == "eachgap") { 
41                                                 distCalculator = new eachGapDist();     
42                                         }else if (globaldata->Estimators[i] == "onegap") {
43                                         distCalculator = new oneGapDist();                                      }
44                                 }
45                         }
46                 }else {
47                         for (i=0; i<globaldata->Estimators.size(); i++) {
48                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
49                                         if (globaldata->Estimators[i] == "nogaps") { 
50                                                 distCalculator = new ignoreGaps();      
51                                         }else if (globaldata->Estimators[i] == "eachgap") { 
52                                                 distCalculator = new eachGapIgnoreTermGapDist();
53                                         }else if (globaldata->Estimators[i] == "onegap") { 
54                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
55                                         }
56                                 }
57                         }
58                 }
59                 
60                 //reset calc for next command
61                 globaldata->setCalc("");
62         }
63         catch(exception& e) {
64                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
65                 exit(1);
66         }
67         catch(...) {
68                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69                 exit(1);
70         }       
71 }
72 //**********************************************************************************************************************
73
74 int DistanceCommand::execute(){
75         try {
76                 
77                 //reads fasta file and fills sequenceDB
78                 if(globaldata->getFastaFile() != "") {  seqDB = new SequenceDB(in);  }
79                 else { cout << "Error no fasta file." << endl; return 0; }
80                                 
81                 int numSeqs = seqDB->getNumSeqs();
82                 cutoff += 0.005;
83                 
84                 string phylipFile = "";
85                 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
86                 remove(distFile.c_str());
87                 
88                 //doses the user want the phylip formatted file as well
89                 if (isTrue(phylip) == true) {
90                         phylipFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
91                         remove(phylipFile.c_str());
92                         
93                         //output numSeqs to phylip formatted dist file
94                         openOutputFile(phylipFile, phylipOut);
95                         phylipOut << numSeqs << endl;
96                         phylipOut.close();
97                 }
98                 
99                                 
100                 //#     if defined (_WIN32)
101                 //figure out how to implement the fork and wait commands in windows
102                 //      driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
103                 //#     endif
104                 
105                                 
106 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107                 //if you don't need to fork anything
108                 if(processors == 1){
109                         driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
110                         
111                         if (isTrue(phylip) == true) {
112                                 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
113                                 remove((phylipFile + "tempPhylipA").c_str());
114                         }
115                 }else{ //you have multiple processors
116                         
117                         for (int i = 0; i < processors; i++) {
118                                 lines.push_back(new linePair());
119                                 lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
120                                 lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
121                         }
122
123                         cout << lines[0]->start << '\t' << lines[0]->end << endl;
124                         cout << lines[1]->start << '\t' << lines[1]->end << endl;
125
126                         createProcesses(distFile, phylipFile); 
127                 
128                         //append and remove temp files
129                         for (it = processIDS.begin(); it != processIDS.end(); it++) {
130                                 appendFiles((distFile + toString(it->second) + ".temp"), distFile);
131                                 remove((distFile + toString(it->second) + ".temp").c_str());
132                                 
133                                 if (isTrue(phylip) == true) {
134                                         appendFiles((phylipFile + toString(it->second) + ".temp"), phylipFile);
135                                         remove((phylipFile + toString(it->second) + ".temp").c_str());
136                                 }
137                         }
138                 }
139 #else
140                 driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
141                 
142                 if (isTrue(phylip) = true) {
143                         appendFiles((phylipFile + "tempPhylipA"), phylipFile);  
144                         remove((phylipFile + "tempPhylipA").c_str());
145                 }
146 #endif
147                 
148                 delete distCalculator;
149                 
150                 return 0;
151                 
152         }
153         catch(exception& e) {
154                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
155                 exit(1);
156         }
157         catch(...) {
158                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
159                 exit(1);
160         }       
161 }
162 /**************************************************************************************************/
163 void DistanceCommand::createProcesses(string column, string phylip) {
164         try {
165                 int process = 0;
166                 processIDS.clear();
167                 
168                 //loop through and create all the processes you want
169                 while (process != processors) {
170                         int pid = fork();
171                         
172                         if (pid > 0) {
173                                 processIDS[lines[process]->end] = pid;  //create map from line number to pid so you can append files in correct order later
174                                 process++;
175                         }else if (pid == 0){
176                                 driver(distCalculator, seqDB, lines[process]->start, lines[process]->end, column + toString(getpid()) + ".temp", phylip + toString(getpid()) + ".temp", cutoff);
177                                 exit(0);
178                         }else { cout << "unable to spawn the necessary processes." << endl; exit(0); }
179                 }
180         
181                 //force parent to wait until all the processes are done
182                 for (it = processIDS.begin(); it != processIDS.end(); it++) { 
183                         int temp = it->second;
184                         wait(&temp);
185                 }
186                 
187         }
188         catch(exception& e) {
189                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
190                 exit(1);
191         }
192         catch(...) {
193                 cout << "An unknown error has occurred in the DistanceCommand class function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
194                 exit(1);
195         }       
196 }
197
198 /**************************************************************************************************/
199 /////// need to fix to work with calcs and sequencedb
200 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, string pFilename, float cutoff){
201         try {
202
203                 int startTime = time(NULL);
204                 
205                 //column file
206                 ofstream distFile(dFileName.c_str(), ios::trunc);
207                 distFile.setf(ios::fixed, ios::showpoint);
208                 distFile << setprecision(4);
209                 
210                 ofstream philFile(pFilename.c_str(), ios::trunc);
211                 philFile.setf(ios::fixed, ios::showpoint);
212                 philFile << setprecision(4);
213                 
214                 for(int i=startLine;i<endLine;i++){
215                         
216                         for(int j=0;j<i;j++){
217                                 distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
218                                 double dist = distCalculator->getDist();
219                                 
220                                 if(dist <= cutoff){
221                                         distFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl;
222                                 }
223                                 if (isTrue(phylip) == true) {  philFile << dist << '\t'; }
224                                 
225                         }
226                         
227                         if (isTrue(phylip) == true) { philFile << endl; }
228                         
229                         if(i % 100 == 0){
230                                 cout << i << '\t' << time(NULL) - startTime << endl;
231                         }
232                         
233                 }
234                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
235                 
236                 if (isTrue(phylip) != true) {  remove(pFilename.c_str());  }
237                 
238                 //philFile.close();
239                 //distFile.close();
240                 
241                 return 1;
242         }
243         catch(exception& e) {
244                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
245                 exit(1);
246         }
247         catch(...) {
248                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249                 exit(1);
250         }       
251         
252 }
253
254 /**************************************************************************************************/
255 void DistanceCommand::appendFiles(string temp, string filename) {
256         try{
257                 ofstream output;
258                 ifstream input;
259         
260                 //open output file in append mode
261                 openOutputFileAppend(filename, output);
262                 
263                 //open temp file for reading
264                 openInputFile(temp, input);
265                 
266                 string line;
267                 //read input file and write to output file
268                 while(input.eof() != true) {
269                         getline(input, line); //getline removes the newline char
270                         if (line != "") {
271                                 output << line << endl;   // Appending back newline char 
272                         }
273                 }       
274                 
275                 input.close();
276                 output.close();
277         }
278         catch(exception& e) {
279                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
280                 exit(1);
281         }
282         catch(...) {
283                 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
284                 exit(1);
285         }       
286 }
287 /**************************************************************************************************/