5 * Created by Sarah Westcott on 5/7/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
17 //**********************************************************************************************************************
19 DistanceCommand::DistanceCommand(){
21 globaldata = GlobalData::getInstance();
22 validCalculator = new ValidCalculators();
23 countends = globaldata->getCountEnds();
24 convert(globaldata->getProcessors(), processors);
25 convert(globaldata->getCutOff(), cutoff);
26 phylip = globaldata->getPhylipFile();
29 string filename = globaldata->getFastaFile();
30 openInputFile(filename, in);
35 if (isTrue(countends) == true) {
36 for (i=0; i<globaldata->Estimators.size(); i++) {
37 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
38 if (globaldata->Estimators[i] == "nogaps") {
39 distCalculator = new ignoreGaps();
40 }else if (globaldata->Estimators[i] == "eachgap") {
41 distCalculator = new eachGapDist();
42 }else if (globaldata->Estimators[i] == "onegap") {
43 distCalculator = new oneGapDist(); }
47 for (i=0; i<globaldata->Estimators.size(); i++) {
48 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
49 if (globaldata->Estimators[i] == "nogaps") {
50 distCalculator = new ignoreGaps();
51 }else if (globaldata->Estimators[i] == "eachgap") {
52 distCalculator = new eachGapIgnoreTermGapDist();
53 }else if (globaldata->Estimators[i] == "onegap") {
54 distCalculator = new oneGapIgnoreTermGapDist();
60 //reset calc for next command
61 globaldata->setCalc("");
64 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
68 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72 //**********************************************************************************************************************
74 int DistanceCommand::execute(){
77 //reads fasta file and fills sequenceDB
78 if(globaldata->getFastaFile() != "") { seqDB = new SequenceDB(in); }
79 else { cout << "Error no fasta file." << endl; return 0; }
81 int numSeqs = seqDB->getNumSeqs();
84 string phylipFile = "";
85 string distFile = getRootName(globaldata->getFastaFile()) + "dist";
86 remove(distFile.c_str());
88 //doses the user want the phylip formatted file as well
89 if (isTrue(phylip) == true) {
90 phylipFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
91 remove(phylipFile.c_str());
93 //output numSeqs to phylip formatted dist file
94 openOutputFile(phylipFile, phylipOut);
95 phylipOut << numSeqs << endl;
100 //# if defined (_WIN32)
101 //figure out how to implement the fork and wait commands in windows
102 // driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
106 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107 //if you don't need to fork anything
109 driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
111 if (isTrue(phylip) == true) {
112 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
113 remove((phylipFile + "tempPhylipA").c_str());
115 }else{ //you have multiple processors
117 for (int i = 0; i < processors; i++) {
118 lines.push_back(new linePair());
119 lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
120 lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
123 cout << lines[0]->start << '\t' << lines[0]->end << endl;
124 cout << lines[1]->start << '\t' << lines[1]->end << endl;
126 createProcesses(distFile, phylipFile);
128 //append and remove temp files
129 for (it = processIDS.begin(); it != processIDS.end(); it++) {
130 appendFiles((distFile + toString(it->second) + ".temp"), distFile);
131 remove((distFile + toString(it->second) + ".temp").c_str());
133 if (isTrue(phylip) == true) {
134 appendFiles((phylipFile + toString(it->second) + ".temp"), phylipFile);
135 remove((phylipFile + toString(it->second) + ".temp").c_str());
140 driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
142 if (isTrue(phylip) = true) {
143 appendFiles((phylipFile + "tempPhylipA"), phylipFile);
144 remove((phylipFile + "tempPhylipA").c_str());
148 delete distCalculator;
153 catch(exception& e) {
154 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
158 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
162 /**************************************************************************************************/
163 void DistanceCommand::createProcesses(string column, string phylip) {
168 //loop through and create all the processes you want
169 while (process != processors) {
173 processIDS[lines[process]->end] = pid; //create map from line number to pid so you can append files in correct order later
176 driver(distCalculator, seqDB, lines[process]->start, lines[process]->end, column + toString(getpid()) + ".temp", phylip + toString(getpid()) + ".temp", cutoff);
178 }else { cout << "unable to spawn the necessary processes." << endl; exit(0); }
181 //force parent to wait until all the processes are done
182 for (it = processIDS.begin(); it != processIDS.end(); it++) {
183 int temp = it->second;
188 catch(exception& e) {
189 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
193 cout << "An unknown error has occurred in the DistanceCommand class function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
198 /**************************************************************************************************/
199 /////// need to fix to work with calcs and sequencedb
200 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, string pFilename, float cutoff){
203 int startTime = time(NULL);
206 ofstream distFile(dFileName.c_str(), ios::trunc);
207 distFile.setf(ios::fixed, ios::showpoint);
208 distFile << setprecision(4);
210 ofstream philFile(pFilename.c_str(), ios::trunc);
211 philFile.setf(ios::fixed, ios::showpoint);
212 philFile << setprecision(4);
214 for(int i=startLine;i<endLine;i++){
216 for(int j=0;j<i;j++){
217 distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
218 double dist = distCalculator->getDist();
221 distFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl;
223 if (isTrue(phylip) == true) { philFile << dist << '\t'; }
227 if (isTrue(phylip) == true) { philFile << endl; }
230 cout << i << '\t' << time(NULL) - startTime << endl;
234 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
236 if (isTrue(phylip) != true) { remove(pFilename.c_str()); }
243 catch(exception& e) {
244 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
248 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
254 /**************************************************************************************************/
255 void DistanceCommand::appendFiles(string temp, string filename) {
260 //open output file in append mode
261 openOutputFileAppend(filename, output);
263 //open temp file for reading
264 openInputFile(temp, input);
267 //read input file and write to output file
268 while(input.eof() != true) {
269 getline(input, line); //getline removes the newline char
271 output << line << endl; // Appending back newline char
278 catch(exception& e) {
279 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
283 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
287 /**************************************************************************************************/