]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
fixed bug with getting sharedrabunds for venn, heatmap and tree.shared commands,...
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 ends = globaldata->getEnds();
24                 convert(globaldata->getProcessors(), processors);
25                 convert(globaldata->getCutOff(), cutoff);
26                 
27                 int i;
28                 if (ends != "T") {
29                         for (i=0; i<globaldata->Estimators.size(); i++) {
30                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
31                                         if (globaldata->Estimators[i] == "nogaps") { 
32                                                 distCalculator = new ignoreGaps();
33                                         }else if (globaldata->Estimators[i] == "eachgap") { 
34                                                 distCalculator = new eachGapDist();     
35                                         }else if (globaldata->Estimators[i] == "onegap") {
36                                                 distCalculator = new oneGapDist();                                      }
37                                 }
38                         }
39                 }else {
40                         for (i=0; i<globaldata->Estimators.size(); i++) {
41                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
42                                         if (globaldata->Estimators[i] == "nogaps") { 
43                                                 distCalculator = new ignoreGaps();      
44                                         }else if (globaldata->Estimators[i] == "eachgap") { 
45                                                 distCalculator = new eachGapIgnoreTermGapDist();
46                                         }else if (globaldata->Estimators[i] == "onegap") { 
47                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
48                                         }
49                                 }
50                         }
51                 }
52                                 
53                 //reset calc for next command
54                 globaldata->setCalc("");
55         }
56         catch(exception& e) {
57                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
58                 exit(1);
59         }
60         catch(...) {
61                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
62                 exit(1);
63         }       
64 }
65 //**********************************************************************************************************************
66
67 int DistanceCommand::execute(){
68         try {
69                 
70                 //read file
71                 string filename = globaldata->inputFileName;
72                 
73                 if(globaldata->getFastaFile() != "") {
74                         readSeqs =  new ReadFasta(filename); }
75                 else if(globaldata->getNexusFile() != "") {
76                         readSeqs = new ReadNexus(filename); }
77                 else if(globaldata->getClustalFile() != "") {
78                         readSeqs = new ReadClustal(filename); }
79                 else if(globaldata->getPhylipFile() != "") {
80                         readSeqs = new ReadPhylip(filename); }
81                         
82                 readSeqs->read();
83                 seqDB = readSeqs->getDB();
84         
85                 int numSeqs = seqDB->getNumSeqs();
86                 
87                 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
88                 
89                 remove(distFile.c_str());
90                 
91                 if(processors == 1){
92                         driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);    
93                 }       
94                 else if(processors == 2){
95                 
96                         int pid = fork();
97                         if(pid > 0){
98                                 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);        
99                                 appendFiles((distFile+"tempa"), distFile);
100                                 remove((distFile + "tempa").c_str());   
101                         }
102                         else{
103                                 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);  
104                                 appendFiles((distFile+"tempb"), distFile);
105                                 remove((distFile + "tempb").c_str());   
106                         }
107                         wait(NULL);
108
109                 }
110                 else if(processors == 3){
111                         int pid1 = fork();
112                         if(pid1 > 0){
113                                 int pid2 = fork();
114                                 if(pid2 > 0){
115                                         driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
116                                         appendFiles(distFile+"tempa", distFile);
117                                         remove((distFile + "tempa").c_str());   
118                                 }
119                                 else{
120                                         driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);        
121                                         appendFiles(distFile+"tempb", distFile);
122                                         remove((distFile + "tempb").c_str());                           
123                                 }
124                                 wait(NULL);
125                         }
126                         else{
127                                 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);      
128                                 appendFiles(distFile+"tempc", distFile);
129                                 remove((distFile + "tempc").c_str());                   
130                         }
131                         wait(NULL);
132                 }
133                 else if(processors == 4){
134                         int pid1 = fork();
135                         if(pid1 > 0){
136                                 int pid2 = fork();
137                                 if(pid2 > 0){
138                                         driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);      
139                                         appendFiles(distFile+"tempa", distFile);
140                                         remove((distFile + "tempa").c_str());                   
141                                 }
142                                 else{
143                                         driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);      
144                                         appendFiles(distFile+"tempb", distFile);
145                                         remove((distFile + "tempb").c_str());                           
146                                 }
147                                 wait(NULL);
148                         }
149                         else{
150                                 int pid3 = fork();
151                                 if(pid3 > 0){
152                                         driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);  
153                                         appendFiles(distFile+"tempc", distFile);
154                                         remove((distFile + "tempc").c_str());                           
155                                 }
156                                 else{
157                                         driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);    
158                                         appendFiles(distFile+"tempd", distFile);
159                                         remove((distFile + "tempd").c_str());                           
160                                 }
161                                 wait(NULL);
162                         }
163                         wait(NULL);
164                 }
165                 wait(NULL);
166         
167                 delete distCalculator;
168         
169                 return 0;
170
171         }
172         catch(exception& e) {
173                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176         catch(...) {
177                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }       
180 }
181
182 /**************************************************************************************************/
183 /////// need to fix to work with calcs and sequencedb
184 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
185         try {
186                 int startTime = time(NULL);
187         
188                 ofstream distFile(dFileName.c_str(), ios::trunc);
189                 distFile.setf(ios::fixed, ios::showpoint);
190                 distFile << setprecision(4);
191         
192                 for(int i=startLine;i<endLine;i++){
193                 
194                         for(int j=0;j<i;j++){   
195                                 distCalculator->calcDist(align->get(i), align->get(j));
196                                 double dist = distCalculator->getDist();
197
198                                 if(dist <= cutoff){
199                                         distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
200                                 }
201                         
202                         }
203                         if(i % 100 == 0){
204                                 cout << i << '\t' << time(NULL) - startTime << endl;
205                         }
206                 
207                 }
208                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
209         
210                 return 1;
211         }
212         catch(exception& e) {
213                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
214                 exit(1);
215         }
216         catch(...) {
217                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218                 exit(1);
219         }       
220
221 }
222
223 /**************************************************************************************************/
224 void DistanceCommand::appendFiles(string temp, string filename) {
225         try{
226                 ofstream output;
227                 ifstream input;
228                 
229                 //open output file in append mode
230                 openOutputFileAppend(filename, output);
231                 
232                 //open temp file for reading
233                 openInputFile(temp, input);
234                 
235                 string line;
236                 //read input file and write to output file
237                 while(input.eof() != true) {
238                         getline(input, line); //getline removes the newline char
239                         if (line != "") {
240                                 output << line << endl;   // Appending back newline char 
241                         }
242                 }       
243                         
244                 input.close();
245                 output.close();
246         }
247         catch(exception& e) {
248                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249                 exit(1);
250         }
251         catch(...) {
252                 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
253                 exit(1);
254         }       
255 }
256 /**************************************************************************************************/