]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
altered venn command to make use of sharedchao for any number of groups, fixed window...
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 ends = globaldata->getEnds();
24                 seqDB = globaldata->gSequenceDB;
25                 convert(globaldata->getProcessors(), processors);
26                 convert(globaldata->getCutOff(), cutoff);
27                 distFile = getRootName(globaldata->getFastaFile()) + "dist";
28                 
29                 int i;
30                 if (ends != "T") {
31                         for (i=0; i<globaldata->Estimators.size(); i++) {
32                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
33                                         if (globaldata->Estimators[i] == "nogaps") { 
34                                                 distCalculator = new ignoreGaps();
35                                         }else if (globaldata->Estimators[i] == "eachgap") { 
36                                                 distCalculator = new eachGapDist();     
37                                         }else if (globaldata->Estimators[i] == "onegap") {
38                                                 distCalculator = new oneGapDist();                                      }
39                                 }
40                         }
41                 }else {
42                         for (i=0; i<globaldata->Estimators.size(); i++) {
43                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
44                                         if (globaldata->Estimators[i] == "nogaps") { 
45                                                 distCalculator = new ignoreGaps();      
46                                         }else if (globaldata->Estimators[i] == "eachgap") { 
47                                                 distCalculator = new eachGapIgnoreTermGapDist();
48                                         }else if (globaldata->Estimators[i] == "onegap") { 
49                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
50                                         }
51                                 }
52                         }
53                 }
54                                 
55                                 
56                 //reset calc for next command
57                 globaldata->setCalc("");
58
59                 
60         }
61         catch(exception& e) {
62                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
63                 exit(1);
64         }
65         catch(...) {
66                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
67                 exit(1);
68         }       
69 }
70 //**********************************************************************************************************************
71
72 int DistanceCommand::execute(){
73         try {
74                 int numSeqs = seqDB->getNumSeqs();
75                 
76                 system(("rm "+distFile).c_str() );
77                 if(processors == 1){
78                         driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);    
79                 }       
80                 else if(processors == 2){
81                 
82                         int pid = fork();
83                         if(pid > 0){
84                                 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);        
85 //                              system(("cat " + distFile + "tempa" + " >> " + distFile).c_str());
86 //                              system(("rm " + distFile + "tempa").c_str());                           
87                         }
88                         else{
89                                 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);  
90 //                              system(("cat " + distFile + "tempb" + " >> " + distFile).c_str());
91 //                              system(("rm " + distFile + "tempb").c_str());                           
92                         }
93                         wait(NULL);
94
95                 }
96                 else if(processors == 3){
97                         int pid1 = fork();
98                         if(pid1 > 0){
99                                 int pid2 = fork();
100                                 if(pid2 > 0){
101                                         driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
102                                         #ifdef HAVE_CAT
103                                                 system(("cat " + distFile + "tempa" + " >> " + distFile).c_str());
104                                         #else
105                                                 #ifdef HAVE_COPY
106 //get system call from pat system(("copy " + distFile + "tempa").c_str());
107                                                 #else
108                                                         cout << "Sorry but I can't continue because this operating system doesn't appear to support the cat() or copy() system calls." << endl;
109                                                 #endif
110                                         #endif
111                                         
112                                         #ifdef HAVE_RM
113                                                 system(("rm " + distFile + "tempa").c_str());   
114                                         #else
115                                                 #ifdef HAVE_ERASE
116                                                         system(("erase " + distFile + "tempa").c_str());
117                                                 #else
118                                                         cout << "Sorry but I can't remove the required files because this operating system doesn't appear to support the rm() or erase() system calls." << endl;
119                                                 #endif
120                                         #endif
121                                 }
122                                 else{
123                                         driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);        
124                                         system(("cat " + distFile + "tempb" + " >> " + distFile).c_str());
125                                         system(("rm " + distFile + "tempb").c_str());                           
126                                 }
127                                 wait(NULL);
128                         }
129                         else{
130                                 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);      
131                                 system(("cat " + distFile + "tempc" + " >> " + distFile).c_str());
132                                 system(("rm " + distFile + "tempc").c_str());                           
133                         }
134                         wait(NULL);
135                 }
136                 else if(processors == 4){
137                         int pid1 = fork();
138                         if(pid1 > 0){
139                                 int pid2 = fork();
140                                 if(pid2 > 0){
141                                         driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);      
142                                         system(("cat " + distFile + "tempa" + " >> " + distFile).c_str());
143                                         system(("rm " + distFile + "tempa").c_str());                           
144                                 }
145                                 else{
146                                         driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);      
147                                         system(("cat " + distFile + "tempb" + " >> " + distFile).c_str());
148                                         system(("rm " + distFile + "tempb").c_str());                           
149                                 }
150                                 wait(NULL);
151                         }
152                         else{
153                                 int pid3 = fork();
154                                 if(pid3 > 0){
155                                         driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);  
156                                         system(("cat " + distFile + "tempc" + " >> " + distFile).c_str());
157                                         system(("rm " + distFile + "tempc").c_str());                           
158                                 }
159                                 else{
160                                         driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);    
161                                         system(("cat " + distFile + "tempd" + " >> " + distFile).c_str());
162                                         system(("rm " + distFile + "tempd").c_str());                           
163                                 }
164                                 wait(NULL);
165                         }
166                         wait(NULL);
167                 }
168                 wait(NULL);
169         
170                 delete distCalculator;
171         
172                 return 0;
173
174         }
175         catch(exception& e) {
176                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
177                 exit(1);
178         }
179         catch(...) {
180                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
181                 exit(1);
182         }       
183 }
184
185 /**************************************************************************************************/
186 /////// need to fix to work with calcs and sequencedb
187 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
188         try {
189                 int startTime = time(NULL);
190         
191                 ofstream distFile(dFileName.c_str(), ios::trunc);
192                 distFile.setf(ios::fixed, ios::showpoint);
193                 distFile << setprecision(4);
194         
195                 for(int i=startLine;i<endLine;i++){
196                 
197                         for(int j=0;j<i;j++){
198                         
199                                 distCalculator->calcDist(align->get(i), align->get(j));
200                                 double dist = distCalculator->getDist();
201                                 if(dist <= cutoff){
202                                 distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
203                                 }
204                         
205                         }
206                         if(i % 100 == 0){
207                                 cout << i << '\t' << time(NULL) - startTime << endl;
208                         }
209                 
210                 }
211                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
212         
213                 return 1;
214         }
215         catch(exception& e) {
216                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
217                 exit(1);
218         }
219         catch(...) {
220                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
221                 exit(1);
222         }       
223
224 }
225
226 /**************************************************************************************************/
227