]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
dist.seqs can now use n processors, and only outputs the phylip formatted distance...
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 countends = globaldata->getCountEnds();
24                 convert(globaldata->getProcessors(), processors);
25                 convert(globaldata->getCutOff(), cutoff);
26                 phylip = globaldata->getPhylipFile();
27                 
28                 //open file
29                 string filename = globaldata->getFastaFile();
30                 openInputFile(filename, in);
31                 
32
33                 
34                 int i;
35                 if (isTrue(countends) == true) {
36                         for (i=0; i<globaldata->Estimators.size(); i++) {
37                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
38                                         if (globaldata->Estimators[i] == "nogaps") { 
39                                                 distCalculator = new ignoreGaps();
40                                         }else if (globaldata->Estimators[i] == "eachgap") { 
41                                                 distCalculator = new eachGapDist();     
42                                         }else if (globaldata->Estimators[i] == "onegap") {
43                                         distCalculator = new oneGapDist();                                      }
44                                 }
45                         }
46                 }else {
47                         for (i=0; i<globaldata->Estimators.size(); i++) {
48                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
49                                         if (globaldata->Estimators[i] == "nogaps") { 
50                                                 distCalculator = new ignoreGaps();      
51                                         }else if (globaldata->Estimators[i] == "eachgap") { 
52                                                 distCalculator = new eachGapIgnoreTermGapDist();
53                                         }else if (globaldata->Estimators[i] == "onegap") { 
54                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
55                                         }
56                                 }
57                         }
58                 }
59                 
60                 //reset calc for next command
61                 globaldata->setCalc("");
62         }
63         catch(exception& e) {
64                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
65                 exit(1);
66         }
67         catch(...) {
68                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69                 exit(1);
70         }       
71 }
72 //**********************************************************************************************************************
73
74 int DistanceCommand::execute(){
75         try {
76                 
77                 //reads fasta file and fills sequenceDB
78                 if(globaldata->getFastaFile() != "") {  seqDB = new SequenceDB(in);  }
79                 else { cout << "Error no fasta file." << endl; return 0; }
80                                 
81                 int numSeqs = seqDB->getNumSeqs();
82                 cutoff += 0.005;
83                 
84                 string phylipFile = "";
85                 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
86                 remove(distFile.c_str());
87                 
88                 //doses the user want the phylip formatted file as well
89                 if (isTrue(phylip) == true) {
90                         phylipFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
91                         remove(phylipFile.c_str());
92                         
93                         //output numSeqs to phylip formatted dist file
94                         openOutputFile(phylipFile, phylipOut);
95                         phylipOut << numSeqs << endl;
96                         phylipOut.close();
97                 }
98                 
99                                 
100                 //#     if defined (_WIN32)
101                 //figure out how to implement the fork and wait commands in windows
102                 //      driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
103                 //#     endif
104                 
105                                 
106 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107                 //if you don't need to fork anything
108                 if(processors == 1){
109                         driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
110                         
111                         if (isTrue(phylip) == true) {
112                                 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
113                                 remove((phylipFile + "tempPhylipA").c_str());
114                         }
115                 }else{ //you have multiple processors
116                         
117                         //create line pairs
118                         int numPerGroup = numSeqs / processors;
119                         int remainder = numSeqs % processors;
120                         
121                         for (int i = 0; i < processors; i++) {
122                                 lines.push_back(new linePair());
123                                 lines[i]->start = i*numPerGroup;
124                                 lines[i]->end = (i+1)*numPerGroup;
125                         }
126                         //give the last one any extra line
127                         lines[lines.size()-1]->end += remainder;
128                         
129                         createProcesses(distFile, phylipFile); 
130                 
131                         //append and remove temp files
132                         for (it = processIDS.begin(); it != processIDS.end(); it++) {
133                                 appendFiles((distFile + toString(it->second) + ".temp"), distFile);
134                                 remove((distFile + toString(it->second) + ".temp").c_str());
135                                 
136                                 if (isTrue(phylip) == true) {
137                                         appendFiles((phylipFile + toString(it->second) + ".temp"), phylipFile);
138                                         remove((phylipFile + toString(it->second) + ".temp").c_str());
139                                 }
140                         }
141                 }
142 #else
143                 driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
144                 
145                 if (isTrue(phylip) = true) {
146                         appendFiles((phylipFile + "tempPhylipA"), phylipFile);  
147                         remove((phylipFile + "tempPhylipA").c_str());
148                 }
149 #endif
150                 
151                 delete distCalculator;
152                 
153                 return 0;
154                 
155         }
156         catch(exception& e) {
157                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
158                 exit(1);
159         }
160         catch(...) {
161                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
162                 exit(1);
163         }       
164 }
165 /**************************************************************************************************/
166 void DistanceCommand::createProcesses(string column, string phylip) {
167         try {
168                 int process = 0;
169                 processIDS.clear();
170                 
171                 //loop through and create all the processes you want
172                 while (process != processors) {
173                         int pid = fork();
174                         
175                         if (pid > 0) {
176                                 processIDS[lines[process]->end] = pid;  //create map from line number to pid so you can append files in correct order later
177                                 process++;
178                         }else if (pid == 0){
179                                 driver(distCalculator, seqDB, lines[process]->start, lines[process]->end, column + toString(getpid()) + ".temp", phylip + toString(getpid()) + ".temp", cutoff);
180                                 exit(0);
181                         }else { cout << "unable to spawn the necessary processes." << endl; exit(0); }
182                 }
183         
184                 //force parent to wait until all the processes are done
185                 for (it = processIDS.begin(); it != processIDS.end(); it++) { 
186                         int temp = it->second;
187                         wait(&temp);
188                 }
189                 
190         }
191         catch(exception& e) {
192                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
193                 exit(1);
194         }
195         catch(...) {
196                 cout << "An unknown error has occurred in the DistanceCommand class function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
197                 exit(1);
198         }       
199 }
200
201 /**************************************************************************************************/
202 /////// need to fix to work with calcs and sequencedb
203 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, string pFilename, float cutoff){
204         try {
205
206                 int startTime = time(NULL);
207                 
208                 //column file
209                 ofstream distFile(dFileName.c_str(), ios::trunc);
210                 distFile.setf(ios::fixed, ios::showpoint);
211                 distFile << setprecision(4);
212                 
213                 ofstream philFile(pFilename.c_str(), ios::trunc);
214                 philFile.setf(ios::fixed, ios::showpoint);
215                 philFile << setprecision(4);
216                 
217                 for(int i=startLine;i<endLine;i++){
218                         
219                         for(int j=0;j<i;j++){
220                                 distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
221                                 double dist = distCalculator->getDist();
222                                 
223                                 if(dist <= cutoff){
224                                         distFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl;
225                                 }
226                                 if (isTrue(phylip) == true) {  philFile << dist << '\t'; }
227                                 
228                         }
229                         
230                         if (isTrue(phylip) == true) { philFile << endl; }
231                         
232                         if(i % 100 == 0){
233                                 cout << i << '\t' << time(NULL) - startTime << endl;
234                         }
235                         
236                 }
237                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
238                 
239                 if (isTrue(phylip) != true) {  remove(pFilename.c_str());  }
240                 
241                 //philFile.close();
242                 //distFile.close();
243                 
244                 return 1;
245         }
246         catch(exception& e) {
247                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
248                 exit(1);
249         }
250         catch(...) {
251                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
252                 exit(1);
253         }       
254         
255 }
256
257 /**************************************************************************************************/
258 void DistanceCommand::appendFiles(string temp, string filename) {
259         try{
260                 ofstream output;
261                 ifstream input;
262         
263                 //open output file in append mode
264                 openOutputFileAppend(filename, output);
265                 
266                 //open temp file for reading
267                 openInputFile(temp, input);
268                 
269                 string line;
270                 //read input file and write to output file
271                 while(input.eof() != true) {
272                         getline(input, line); //getline removes the newline char
273                         if (line != "") {
274                                 output << line << endl;   // Appending back newline char 
275                         }
276                 }       
277                 
278                 input.close();
279                 output.close();
280         }
281         catch(exception& e) {
282                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
283                 exit(1);
284         }
285         catch(...) {
286                 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
287                 exit(1);
288         }       
289 }
290 /**************************************************************************************************/