5 * Created by Sarah Westcott on 5/7/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
17 //**********************************************************************************************************************
19 DistanceCommand::DistanceCommand(){
21 globaldata = GlobalData::getInstance();
22 validCalculator = new ValidCalculators();
23 countends = globaldata->getCountEnds();
24 convert(globaldata->getProcessors(), processors);
25 convert(globaldata->getCutOff(), cutoff);
28 string filename = globaldata->getFastaFile();
29 openInputFile(filename, in);
33 if (isTrue(countends) == true) {
34 for (i=0; i<globaldata->Estimators.size(); i++) {
35 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
36 if (globaldata->Estimators[i] == "nogaps") {
37 distCalculator = new ignoreGaps();
38 }else if (globaldata->Estimators[i] == "eachgap") {
39 distCalculator = new eachGapDist();
40 }else if (globaldata->Estimators[i] == "onegap") {
41 distCalculator = new oneGapDist(); }
45 for (i=0; i<globaldata->Estimators.size(); i++) {
46 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
47 if (globaldata->Estimators[i] == "nogaps") {
48 distCalculator = new ignoreGaps();
49 }else if (globaldata->Estimators[i] == "eachgap") {
50 distCalculator = new eachGapIgnoreTermGapDist();
51 }else if (globaldata->Estimators[i] == "onegap") {
52 distCalculator = new oneGapIgnoreTermGapDist();
58 //reset calc for next command
59 globaldata->setCalc("");
62 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
66 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70 //**********************************************************************************************************************
72 int DistanceCommand::execute(){
75 //reads fasta file and fills sequenceDB
76 if(globaldata->getFastaFile() != "") { seqDB = new SequenceDB(in); }
77 else { cout << "Error no fasta file." << endl; return 0; }
79 int numSeqs = seqDB->getNumSeqs();
82 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
83 string phylipFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
85 remove(phylipFile.c_str());
86 remove(distFile.c_str());
88 //output numSeqs to phylip formatted dist file
89 openOutputFile(phylipFile, phylipOut);
90 phylipOut << numSeqs << endl;
93 //# if defined (_WIN32)
94 //figure out how to implement the fork and wait commands in windows
95 // driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
98 #if defined (__APPLE__) || (__MACH__)
100 driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
101 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
102 remove((phylipFile + "tempPhylipA").c_str());
104 else if(processors == 2){
108 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
109 appendFiles((distFile+"tempa"), distFile);
110 remove((distFile + "tempa").c_str());
111 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
112 remove((phylipFile + "tempPhylipA").c_str());
115 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
116 appendFiles((distFile+"tempb"), distFile);
117 remove((distFile + "tempb").c_str());
118 appendFiles((phylipFile + "tempPhylipB"), phylipFile);
119 remove((phylipFile + "tempPhylipB").c_str());
124 else if(processors == 3){
129 driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
130 appendFiles(distFile+"tempa", distFile);
131 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
132 remove((distFile + "tempa").c_str());
133 remove((phylipFile + "tempPhylipA").c_str());
136 driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
137 appendFiles(distFile+"tempb", distFile);
138 appendFiles((phylipFile + "tempPhylipB"), phylipFile);
139 remove((distFile + "tempb").c_str());
140 remove((phylipFile + "tempPhylipB").c_str());
145 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
146 appendFiles(distFile+"tempc", distFile);
147 appendFiles((phylipFile + "tempPhylipC"), phylipFile);
148 remove((distFile + "tempc").c_str());
149 remove((phylipFile + "tempPhylipC").c_str());
153 else if(processors == 4){
158 driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
159 appendFiles(distFile+"tempa", distFile);
160 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
161 remove((distFile + "tempa").c_str());
162 remove((phylipFile + "tempPhylipA").c_str());
165 driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
166 appendFiles(distFile+"tempb", distFile);
167 appendFiles((phylipFile + "tempPhylipB"), phylipFile);
168 remove((distFile + "tempb").c_str());
169 remove((phylipFile + "tempPhylipB").c_str());
176 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
177 appendFiles(distFile+"tempc", distFile);
178 appendFiles((phylipFile + "tempPhylipC"), phylipFile);
179 remove((distFile + "tempc").c_str());
180 remove((phylipFile + "tempPhylipC").c_str());
184 driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", phylipFile + "tempPhylipD", cutoff);
185 appendFiles(distFile+"tempd", distFile);
186 appendFiles((phylipFile + "tempPhylipD"), phylipFile);
187 remove((distFile + "tempd").c_str());
188 remove((phylipFile + "tempPhylipD").c_str());
195 #elif (linux) || (__linux)
197 driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
198 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
199 remove((phylipFile + "tempPhylipA").c_str());
201 else if(processors == 2){
205 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
206 appendFiles((distFile+"tempa"), distFile);
207 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
208 remove((distFile + "tempa").c_str());
209 remove((phylipFile + "tempPhylipA").c_str());
213 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
214 appendFiles((distFile+"tempb"), distFile);
215 appendFiles((phylipFile + "tempPhylipB"), phylipFile);
216 remove((distFile + "tempb").c_str());
217 remove((phylipFile + "tempPhylipB").c_str());
222 else if(processors == 3){
227 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
228 appendFiles((distFile+"tempa"), distFile);
229 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
230 remove((distFile + "tempa").c_str());
231 remove((phylipFile + "tempPhylipA").c_str());
235 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
236 appendFiles((distFile+"tempb"), distFile);
237 appendFiles((phylipFile + "tempPhylipB"), phylipFile);
238 remove((distFile + "tempb").c_str());
239 remove((phylipFile + "tempPhylipB").c_str());
244 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
245 appendFiles(distFile+"tempc", distFile);
246 appendFiles((phylipFile + "tempPhylipC"), phylipFile);
247 remove((distFile + "tempc").c_str());
248 remove((phylipFile + "tempPhylipC").c_str());
252 else if(processors == 4){
257 driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
258 appendFiles((distFile+"tempa"), distFile);
259 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
260 remove((distFile + "tempa").c_str());
261 remove((phylipFile + "tempPhylipA").c_str());
264 driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
265 appendFiles((distFile+"tempb"), distFile);
266 appendFiles((phylipFile + "tempPhylipB"), phylipFile);
267 remove((distFile + "tempb").c_str());
268 remove((phylipFile + "tempPhylipB").c_str());
275 driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
276 appendFiles(distFile+"tempc", distFile);
277 appendFiles((phylipFile + "tempPhylipC"), phylipFile);
278 remove((distFile + "tempc").c_str());
279 remove((phylipFile + "tempPhylipC").c_str());
282 driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", phylipFile + "tempPhylipD", cutoff);
283 appendFiles(distFile+"tempd", distFile);
284 appendFiles((phylipFile + "tempPhylipD"), phylipFile);
285 remove((distFile + "tempd").c_str());
286 remove((phylipFile + "tempPhylipD").c_str());
295 driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
296 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
297 remove((phylipFile + "tempPhylipA").c_str());
300 delete distCalculator;
305 catch(exception& e) {
306 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
310 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
315 /**************************************************************************************************/
316 /////// need to fix to work with calcs and sequencedb
317 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, string pFilename, float cutoff){
319 int startTime = time(NULL);
322 ofstream distFile(dFileName.c_str(), ios::trunc);
323 distFile.setf(ios::fixed, ios::showpoint);
324 distFile << setprecision(4);
327 ofstream philFile(pFilename.c_str(), ios::trunc);
328 philFile.setf(ios::fixed, ios::showpoint);
329 philFile << setprecision(4);
331 for(int i=startLine;i<endLine;i++){
333 for(int j=0;j<i;j++){
334 distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
335 double dist = distCalculator->getDist();
338 distFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl;
340 philFile << dist << '\t';
346 cout << i << '\t' << time(NULL) - startTime << endl;
350 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
357 catch(exception& e) {
358 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
362 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
368 /**************************************************************************************************/
369 void DistanceCommand::appendFiles(string temp, string filename) {
374 //open output file in append mode
375 openOutputFileAppend(filename, output);
377 //open temp file for reading
378 openInputFile(temp, input);
381 //read input file and write to output file
382 while(input.eof() != true) {
383 getline(input, line); //getline removes the newline char
385 output << line << endl; // Appending back newline char
392 catch(exception& e) {
393 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
397 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
401 /**************************************************************************************************/