]> git.donarmstrong.com Git - mothur.git/blob - distancecommand.cpp
added alignment code
[mothur.git] / distancecommand.cpp
1 /*
2  *  distancecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 5/7/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
16
17 //**********************************************************************************************************************
18
19 DistanceCommand::DistanceCommand(){
20         try {
21                 globaldata = GlobalData::getInstance();
22                 validCalculator = new ValidCalculators();
23                 ends = globaldata->getEnds();
24                 convert(globaldata->getProcessors(), processors);
25                 convert(globaldata->getCutOff(), cutoff);
26                 
27                 int i;
28                 if (ends != "T") {
29                         for (i=0; i<globaldata->Estimators.size(); i++) {
30                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
31                                         if (globaldata->Estimators[i] == "nogaps") { 
32                                                 distCalculator = new ignoreGaps();
33                                         }else if (globaldata->Estimators[i] == "eachgap") { 
34                                                 distCalculator = new eachGapDist();     
35                                         }else if (globaldata->Estimators[i] == "onegap") {
36                                                 distCalculator = new oneGapDist();                                      }
37                                 }
38                         }
39                 }else {
40                         for (i=0; i<globaldata->Estimators.size(); i++) {
41                                 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) { 
42                                         if (globaldata->Estimators[i] == "nogaps") { 
43                                                 distCalculator = new ignoreGaps();      
44                                         }else if (globaldata->Estimators[i] == "eachgap") { 
45                                                 distCalculator = new eachGapIgnoreTermGapDist();
46                                         }else if (globaldata->Estimators[i] == "onegap") { 
47                                                 distCalculator = new oneGapIgnoreTermGapDist(); 
48                                         }
49                                 }
50                         }
51                 }
52                                 
53                 //reset calc for next command
54                 globaldata->setCalc("");
55         }
56         catch(exception& e) {
57                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
58                 exit(1);
59         }
60         catch(...) {
61                 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
62                 exit(1);
63         }       
64 }
65 //**********************************************************************************************************************
66
67 int DistanceCommand::execute(){
68         try {
69                 
70                 //read file
71                 string filename = globaldata->inputFileName;
72                 
73                 if(globaldata->getFastaFile() != "") {
74                         readSeqs =  new ReadFasta(filename); }
75                 else if(globaldata->getNexusFile() != "") {
76                         readSeqs = new ReadNexus(filename); }
77                 else if(globaldata->getClustalFile() != "") {
78                         readSeqs = new ReadClustal(filename); }
79                 else if(globaldata->getPhylipFile() != "") {
80                         readSeqs = new ReadPhylip(filename); }
81                         
82                 readSeqs->read();
83                 seqDB = readSeqs->getDB();
84         
85                 int numSeqs = seqDB->getNumSeqs();
86                 
87                 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
88                 
89                 remove(distFile.c_str());
90                 
91                 //#     if defined (_WIN32)
92                         //figure out how to implement the fork and wait commands in windows
93                 //      driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
94                 //#     endif
95                 
96                 #       if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
97                         if(processors == 1){
98                                 driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);    
99                         }       
100                         else if(processors == 2){
101                 
102                                 int pid = fork();
103                                 if(pid > 0){
104                                         driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);        
105                                         appendFiles((distFile+"tempa"), distFile);
106                                         remove((distFile + "tempa").c_str());   
107                                 }
108                                 else{
109                                         driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);  
110                                         appendFiles((distFile+"tempb"), distFile);
111                                         remove((distFile + "tempb").c_str());   
112                                 }
113                                 wait(NULL);
114
115                         }
116                         else if(processors == 3){
117                                 int pid1 = fork();
118                                 if(pid1 > 0){
119                                         int pid2 = fork();
120                                         if(pid2 > 0){
121                                                 driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
122                                                 appendFiles(distFile+"tempa", distFile);
123                                                 remove((distFile + "tempa").c_str());   
124                                         }
125                                         else{
126                                                 driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);        
127                                                 appendFiles(distFile+"tempb", distFile);
128                                                 remove((distFile + "tempb").c_str());                           
129                                         }
130                                         wait(NULL);
131                                 }
132                                 else{
133                                         driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);      
134                                         appendFiles(distFile+"tempc", distFile);
135                                         remove((distFile + "tempc").c_str());                   
136                                 }
137                                 wait(NULL);
138                         }
139                         else if(processors == 4){
140                                 int pid1 = fork();
141                                 if(pid1 > 0){
142                                         int pid2 = fork();
143                                         if(pid2 > 0){
144                                                 driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);      
145                                                 appendFiles(distFile+"tempa", distFile);
146                                                 remove((distFile + "tempa").c_str());                   
147                                         }
148                                         else{
149                                                 driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);      
150                                                 appendFiles(distFile+"tempb", distFile);
151                                                 remove((distFile + "tempb").c_str());                           
152                                         }
153                                         wait(NULL);
154                                 }
155                                 else{
156                                         int pid3 = fork();
157                                         if(pid3 > 0){
158                                                 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);  
159                                                 appendFiles(distFile+"tempc", distFile);
160                                                 remove((distFile + "tempc").c_str());                           
161                                         }
162                                         else{
163                                                 driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);    
164                                                 appendFiles(distFile+"tempd", distFile);
165                                                 remove((distFile + "tempd").c_str());                           
166                                         }
167                                         wait(NULL);
168                                 }
169                                 wait(NULL);
170                         }
171                         wait(NULL);
172                 #       else
173                         driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
174                 #       endif
175         
176                 delete distCalculator;
177         
178                 return 0;
179
180         }
181         catch(exception& e) {
182                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183                 exit(1);
184         }
185         catch(...) {
186                 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
187                 exit(1);
188         }       
189 }
190
191 /**************************************************************************************************/
192 /////// need to fix to work with calcs and sequencedb
193 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
194         try {
195                 int startTime = time(NULL);
196         
197                 ofstream distFile(dFileName.c_str(), ios::trunc);
198                 distFile.setf(ios::fixed, ios::showpoint);
199                 distFile << setprecision(4);
200         
201                 for(int i=startLine;i<endLine;i++){
202                 
203                         for(int j=0;j<i;j++){
204 //cout << "unaligned" << endl;
205 //cout << align->get(i).getUnaligned() << "  " << align->get(j).getUnaligned() << endl;
206 //cout << "aligned" << endl;
207 //cout << align->get(i).getAligned() << "  " << align->get(j).getAligned() << endl;
208                                 distCalculator->calcDist(align->get(i), align->get(j));
209                                 double dist = distCalculator->getDist();
210
211                                 if(dist <= cutoff){
212                                         distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
213                                 }
214                         
215                         }
216                         if(i % 100 == 0){
217                                 cout << i << '\t' << time(NULL) - startTime << endl;
218                         }
219                 
220                 }
221                 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
222         
223                 return 1;
224         }
225         catch(exception& e) {
226                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
227                 exit(1);
228         }
229         catch(...) {
230                 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231                 exit(1);
232         }       
233
234 }
235
236 /**************************************************************************************************/
237 void DistanceCommand::appendFiles(string temp, string filename) {
238         try{
239                 ofstream output;
240                 ifstream input;
241                 
242                 //open output file in append mode
243                 openOutputFileAppend(filename, output);
244                 
245                 //open temp file for reading
246                 openInputFile(temp, input);
247                 
248                 string line;
249                 //read input file and write to output file
250                 while(input.eof() != true) {
251                         getline(input, line); //getline removes the newline char
252                         if (line != "") {
253                                 output << line << endl;   // Appending back newline char 
254                         }
255                 }       
256                         
257                 input.close();
258                 output.close();
259         }
260         catch(exception& e) {
261                 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
262                 exit(1);
263         }
264         catch(...) {
265                 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
266                 exit(1);
267         }       
268 }
269 /**************************************************************************************************/