]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
added distance command and filterseqs
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 ends = globaldata->getEnds();
24                 convert(globaldata->getProcessors(), processors);
25                 convert(globaldata->getCutOff(), cutoff);
26                 
27                 int i;
28                 if (ends != "T") {
29                         for (i=0; i<globaldata->Estimators.size(); i++) {
30                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
31                                         if (globaldata->Estimators[i] == "nogaps") { 
32                                                 distCalculator = new ignoreGaps();
33                                         }else if (globaldata->Estimators[i] == "eachgap") { 
34                                                 distCalculator = new eachGapDist();     
35                                         }else if (globaldata->Estimators[i] == "onegap") {
36                                                 distCalculator = new oneGapDist();                                      }
37                                 }
38                         }
39                 }else {
40                         for (i=0; i<globaldata->Estimators.size(); i++) {
41                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
42                                         if (globaldata->Estimators[i] == "nogaps") { 
43                                                 distCalculator = new ignoreGaps();      
44                                         }else if (globaldata->Estimators[i] == "eachgap") { 
45                                                 distCalculator = new eachGapIgnoreTermGapDist();
46                                         }else if (globaldata->Estimators[i] == "onegap") { 
47                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
48                                         }
49                                 }
50                         }
51                 }
52                                 
53                 //reset calc for next command
54                 globaldata->setCalc("");
55
56                 
57         }
58         catch(exception& e) {
59                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
60                 exit(1);
61         }
62         catch(...) {
63                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
64                 exit(1);
65         }       
66 }
67 //**********************************************************************************************************************
68
69 int DistanceCommand::execute(){
70         try {
71                 
72                 //read file
73                 string filename = globaldata->inputFileName;
74                 
75                 if(globaldata->getFastaFile() != "") {
76                         readSeqs =  new ReadFasta(filename); }
77                 else if(globaldata->getNexusFile() != "") {
78                         readSeqs = new ReadNexus(filename); }
79                 else if(globaldata->getClustalFile() != "") {
80                         readSeqs = new ReadClustal(filename); }
81                 else if(globaldata->getPhylipFile() != "") {
82                         readSeqs = new ReadPhylip(filename); }
83                         
84                 readSeqs->read();
85                 seqDB = readSeqs->getDB();
86         
87                 int numSeqs = seqDB->getNumSeqs();
88                 
89                 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
90                 
91                 remove(distFile.c_str());
92                 
93                 if(processors == 1){
94                         driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);    
95                 }       
96                 else if(processors == 2){
97                 
98                         int pid = fork();
99                         if(pid > 0){
100                                 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);        
101                                 appendFiles((distFile+"tempa"), distFile);
102                                 remove((distFile + "tempa").c_str());   
103                         }
104                         else{
105                                 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);  
106                                 appendFiles((distFile+"tempb"), distFile);
107                                 remove((distFile + "tempb").c_str());   
108                         }
109                         wait(NULL);
110
111                 }
112                 else if(processors == 3){
113                         int pid1 = fork();
114                         if(pid1 > 0){
115                                 int pid2 = fork();
116                                 if(pid2 > 0){
117                                         driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
118                                         appendFiles(distFile+"tempa", distFile);
119                                         remove((distFile + "tempa").c_str());   
120                                 }
121                                 else{
122                                         driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);        
123                                         appendFiles(distFile+"tempb", distFile);
124                                         remove((distFile + "tempb").c_str());                           
125                                 }
126                                 wait(NULL);
127                         }
128                         else{
129                                 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);      
130                                 appendFiles(distFile+"tempc", distFile);
131                                 remove((distFile + "tempc").c_str());                   
132                         }
133                         wait(NULL);
134                 }
135                 else if(processors == 4){
136                         int pid1 = fork();
137                         if(pid1 > 0){
138                                 int pid2 = fork();
139                                 if(pid2 > 0){
140                                         driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);      
141                                         //system(("cat " + distFile + "tempa" + " >> " + distFile).c_str());
142                                         appendFiles(distFile+"tempa", distFile);
143                                         //system(("rm " + distFile + "tempa").c_str()); 
144                                         remove((distFile + "tempa").c_str());                   
145                                 }
146                                 else{
147                                         driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);      
148                                         //system(("cat " + distFile + "tempb" + " >> " + distFile).c_str());
149                                         appendFiles(distFile+"tempb", distFile);
150                                         //system(("rm " + distFile + "tempb").c_str());
151                                         remove((distFile + "tempb").c_str());                           
152                                 }
153                                 wait(NULL);
154                         }
155                         else{
156                                 int pid3 = fork();
157                                 if(pid3 > 0){
158                                         driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);  
159                                         //system(("cat " + distFile + "tempc" + " >> " + distFile).c_str());
160                                         appendFiles(distFile+"tempc", distFile);
161                                         //system(("rm " + distFile + "tempc").c_str());
162                                         remove((distFile + "tempc").c_str());                           
163                                 }
164                                 else{
165                                         driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);    
166                                         //system(("cat " + distFile + "tempd" + " >> " + distFile).c_str());
167                                         appendFiles(distFile+"tempd", distFile);
168                                         //system(("rm " + distFile + "tempd").c_str());
169                                         remove((distFile + "tempd").c_str());                           
170                                 }
171                                 wait(NULL);
172                         }
173                         wait(NULL);
174                 }
175                 wait(NULL);
176         
177                 delete distCalculator;
178         
179                 return 0;
180
181         }
182         catch(exception& e) {
183                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
184                 exit(1);
185         }
186         catch(...) {
187                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
188                 exit(1);
189         }       
190 }
191
192 /**************************************************************************************************/
193 /////// need to fix to work with calcs and sequencedb
194 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
195         try {
196                 int startTime = time(NULL);
197         
198                 ofstream distFile(dFileName.c_str(), ios::trunc);
199                 distFile.setf(ios::fixed, ios::showpoint);
200                 distFile << setprecision(4);
201         
202                 for(int i=startLine;i<endLine;i++){
203                 
204                         for(int j=0;j<i;j++){   
205                                 distCalculator->calcDist(align->get(i), align->get(j));
206                                 double dist = distCalculator->getDist();
207
208                                 if(dist <= cutoff){
209                                         distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
210 //cout << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
211                                 }
212                         
213                         }
214                         if(i % 100 == 0){
215                                 cout << i << '\t' << time(NULL) - startTime << endl;
216                         }
217                 
218                 }
219                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
220         
221                 return 1;
222         }
223         catch(exception& e) {
224                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
225                 exit(1);
226         }
227         catch(...) {
228                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
229                 exit(1);
230         }       
231
232 }
233
234 /**************************************************************************************************/
235 void DistanceCommand::appendFiles(string temp, string filename) {
236         try{
237                 ofstream output;
238                 ifstream input;
239                 
240                 //open output file in append mode
241                 openOutputFileAppend(filename, output);
242                 
243                 //open temp file for reading
244                 openInputFile(temp, input);
245                 
246                 string line;
247                 //read input file and write to output file
248                 while(input.eof() != true) {
249                         getline(input, line); //getline removes the newline char
250                         if (line != "") {
251                                 output << line << endl;   // Appending back newline char 
252                         }
253                 }       
254                         
255                 input.close();
256                 output.close();
257         }
258         catch(exception& e) {
259                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
260                 exit(1);
261         }
262         catch(...) {
263                 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
264                 exit(1);
265         }       
266 }