]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
got rid of extra temp files in dist.seqs
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 countends = globaldata->getCountEnds();
24                 convert(globaldata->getProcessors(), processors);
25                 convert(globaldata->getCutOff(), cutoff);
26                 phylip = globaldata->getPhylipFile();
27                 
28                 //open file
29                 string filename = globaldata->getFastaFile();
30                 openInputFile(filename, in);
31                 
32
33                 
34                 int i;
35                 if (isTrue(countends) == true) {
36                         for (i=0; i<globaldata->Estimators.size(); i++) {
37                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
38                                         if (globaldata->Estimators[i] == "nogaps") { 
39                                                 distCalculator = new ignoreGaps();
40                                         }else if (globaldata->Estimators[i] == "eachgap") { 
41                                                 distCalculator = new eachGapDist();     
42                                         }else if (globaldata->Estimators[i] == "onegap") {
43                                         distCalculator = new oneGapDist();                                      }
44                                 }
45                         }
46                 }else {
47                         for (i=0; i<globaldata->Estimators.size(); i++) {
48                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
49                                         if (globaldata->Estimators[i] == "nogaps") { 
50                                                 distCalculator = new ignoreGaps();      
51                                         }else if (globaldata->Estimators[i] == "eachgap") { 
52                                                 distCalculator = new eachGapIgnoreTermGapDist();
53                                         }else if (globaldata->Estimators[i] == "onegap") { 
54                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
55                                         }
56                                 }
57                         }
58                 }
59                 
60                 //reset calc for next command
61                 globaldata->setCalc("");
62         }
63         catch(exception& e) {
64                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
65                 exit(1);
66         }
67         catch(...) {
68                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69                 exit(1);
70         }       
71 }
72 //**********************************************************************************************************************
73
74 int DistanceCommand::execute(){
75         try {
76                 
77                 //reads fasta file and fills sequenceDB
78                 if(globaldata->getFastaFile() != "") {  seqDB = new SequenceDB(in);  }
79                 else { cout << "Error no fasta file." << endl; return 0; }
80                                 
81                 int numSeqs = seqDB->getNumSeqs();
82                 cutoff += 0.005;
83                 
84                 string outputFile;
85                 
86                 //doses the user want the phylip formatted file as well
87                 if (isTrue(phylip) == true) {
88                         outputFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
89                         remove(outputFile.c_str());
90                         
91                         //output numSeqs to phylip formatted dist file
92                         openOutputFile(outputFile, outFile);
93                         outFile << numSeqs << endl;
94                         outFile.close();
95                 }else { //user wants column format
96                         outputFile = getRootName(globaldata->getFastaFile()) + "dist";
97                         remove(outputFile.c_str());
98                 }
99                                 
100                 //#     if defined (_WIN32)
101                 //figure out how to implement the fork and wait commands in windows
102                 //      driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
103                 //#     endif
104                 
105                                 
106 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107                 //if you don't need to fork anything
108                 if(processors == 1){
109                         driver(distCalculator, seqDB, 0, numSeqs, outputFile + ".temp", cutoff);
110                         appendFiles((outputFile + ".temp"), outputFile);
111                         remove((outputFile + ".temp").c_str());
112                 }else{ //you have multiple processors
113                         
114                         for (int i = 0; i < processors; i++) {
115                                 lines.push_back(new linePair());
116                                 lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
117                                 lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
118                         }
119
120                         cout << lines[0]->start << '\t' << lines[0]->end << endl;
121                         cout << lines[1]->start << '\t' << lines[1]->end << endl;
122
123                         createProcesses(outputFile); 
124                 
125                         //append and remove temp files
126                         for (it = processIDS.begin(); it != processIDS.end(); it++) {
127                                 appendFiles((outputFile + toString(it->second) + ".temp"), outputFile);
128                                 remove((outputFile + toString(it->second) + ".temp").c_str());
129                         }
130                 }
131 #else
132                 driver(distCalculator, seqDB, 0, numSeqs, outputFile + ".temp", cutoff);
133                 appendFiles((outputFile + ".temp"), outputFile);
134                 remove((outputFile + ".temp").c_str());
135 #endif
136                 
137                 delete distCalculator;
138                 
139                 return 0;
140                 
141         }
142         catch(exception& e) {
143                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
144                 exit(1);
145         }
146         catch(...) {
147                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
148                 exit(1);
149         }       
150 }
151 /**************************************************************************************************/
152 void DistanceCommand::createProcesses(string filename) {
153         try {
154                 int process = 0;
155                 processIDS.clear();
156                 
157                 //loop through and create all the processes you want
158                 while (process != processors) {
159                         int pid = fork();
160                         
161                         if (pid > 0) {
162                                 processIDS[lines[process]->end] = pid;  //create map from line number to pid so you can append files in correct order later
163                                 process++;
164                         }else if (pid == 0){
165                                 driver(distCalculator, seqDB, lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff);
166                                 exit(0);
167                         }else { cout << "unable to spawn the necessary processes." << endl; exit(0); }
168                 }
169         
170                 //force parent to wait until all the processes are done
171                 for (it = processIDS.begin(); it != processIDS.end(); it++) { 
172                         int temp = it->second;
173                         wait(&temp);
174                 }
175                 
176         }
177         catch(exception& e) {
178                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }
181         catch(...) {
182                 cout << "An unknown error has occurred in the DistanceCommand class function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183                 exit(1);
184         }       
185 }
186
187 /**************************************************************************************************/
188 /////// need to fix to work with calcs and sequencedb
189 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
190         try {
191
192                 int startTime = time(NULL);
193                 
194                 //column file
195                 ofstream outFile(dFileName.c_str(), ios::trunc);
196                 outFile.setf(ios::fixed, ios::showpoint);
197                 outFile << setprecision(4);
198                 
199                 for(int i=startLine;i<endLine;i++){
200                         
201                         for(int j=0;j<i;j++){
202                                 distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
203                                 double dist = distCalculator->getDist();
204                                 
205                                 if(dist <= cutoff){
206                                         if (isTrue(phylip) != true) { outFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl; }
207                                 }
208                                 if (isTrue(phylip) == true) {  outFile << dist << '\t'; }
209                                 
210                         }
211                         
212                         if (isTrue(phylip) == true) { outFile << endl; }
213                         
214                         if(i % 100 == 0){
215                                 cout << i << '\t' << time(NULL) - startTime << endl;
216                         }
217                         
218                 }
219                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
220                 
221                 //philFile.close();
222                 //distFile.close();
223                 
224                 return 1;
225         }
226         catch(exception& e) {
227                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
228                 exit(1);
229         }
230         catch(...) {
231                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
232                 exit(1);
233         }       
234         
235 }
236
237 /**************************************************************************************************/
238 void DistanceCommand::appendFiles(string temp, string filename) {
239         try{
240                 ofstream output;
241                 ifstream input;
242         
243                 //open output file in append mode
244                 openOutputFileAppend(filename, output);
245                 
246                 //open temp file for reading
247                 openInputFile(temp, input);
248                 
249                 string line;
250                 //read input file and write to output file
251                 while(input.eof() != true) {
252                         getline(input, line); //getline removes the newline char
253                         if (line != "") {
254                                 output << line << endl;   // Appending back newline char 
255                         }
256                 }       
257                 
258                 input.close();
259                 output.close();
260         }
261         catch(exception& e) {
262                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
263                 exit(1);
264         }
265         catch(...) {
266                 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
267                 exit(1);
268         }       
269 }
270 /**************************************************************************************************/