5 * Created by Sarah Westcott on 5/7/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "distancecommand.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13 #include "eachgapignore.h"
14 #include "onegapdist.h"
15 #include "onegapignore.h"
17 //**********************************************************************************************************************
19 DistanceCommand::DistanceCommand(){
21 globaldata = GlobalData::getInstance();
22 validCalculator = new ValidCalculators();
23 countends = globaldata->getCountEnds();
24 convert(globaldata->getProcessors(), processors);
25 convert(globaldata->getCutOff(), cutoff);
26 phylip = globaldata->getPhylipFile();
29 string filename = globaldata->getFastaFile();
30 openInputFile(filename, in);
35 if (isTrue(countends) == true) {
36 for (i=0; i<globaldata->Estimators.size(); i++) {
37 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
38 if (globaldata->Estimators[i] == "nogaps") {
39 distCalculator = new ignoreGaps();
40 }else if (globaldata->Estimators[i] == "eachgap") {
41 distCalculator = new eachGapDist();
42 }else if (globaldata->Estimators[i] == "onegap") {
43 distCalculator = new oneGapDist(); }
47 for (i=0; i<globaldata->Estimators.size(); i++) {
48 if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
49 if (globaldata->Estimators[i] == "nogaps") {
50 distCalculator = new ignoreGaps();
51 }else if (globaldata->Estimators[i] == "eachgap") {
52 distCalculator = new eachGapIgnoreTermGapDist();
53 }else if (globaldata->Estimators[i] == "onegap") {
54 distCalculator = new oneGapIgnoreTermGapDist();
60 //reset calc for next command
61 globaldata->setCalc("");
64 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
68 cout << "An unknown error has occurred in the DistanceCommand class function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72 //**********************************************************************************************************************
74 int DistanceCommand::execute(){
77 //reads fasta file and fills sequenceDB
78 if(globaldata->getFastaFile() != "") { seqDB = new SequenceDB(in); }
79 else { cout << "Error no fasta file." << endl; return 0; }
81 int numSeqs = seqDB->getNumSeqs();
86 //doses the user want the phylip formatted file as well
87 if (isTrue(phylip) == true) {
88 outputFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
89 remove(outputFile.c_str());
91 //output numSeqs to phylip formatted dist file
92 openOutputFile(outputFile, outFile);
93 outFile << numSeqs << endl;
95 }else { //user wants column format
96 outputFile = getRootName(globaldata->getFastaFile()) + "dist";
97 remove(outputFile.c_str());
100 //# if defined (_WIN32)
101 //figure out how to implement the fork and wait commands in windows
102 // driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
106 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107 //if you don't need to fork anything
109 driver(distCalculator, seqDB, 0, numSeqs, outputFile + ".temp", cutoff);
110 appendFiles((outputFile + ".temp"), outputFile);
111 remove((outputFile + ".temp").c_str());
112 }else{ //you have multiple processors
114 for (int i = 0; i < processors; i++) {
115 lines.push_back(new linePair());
116 lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
117 lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
120 cout << lines[0]->start << '\t' << lines[0]->end << endl;
121 cout << lines[1]->start << '\t' << lines[1]->end << endl;
123 createProcesses(outputFile);
125 //append and remove temp files
126 for (it = processIDS.begin(); it != processIDS.end(); it++) {
127 appendFiles((outputFile + toString(it->second) + ".temp"), outputFile);
128 remove((outputFile + toString(it->second) + ".temp").c_str());
132 driver(distCalculator, seqDB, 0, numSeqs, outputFile + ".temp", cutoff);
133 appendFiles((outputFile + ".temp"), outputFile);
134 remove((outputFile + ".temp").c_str());
137 delete distCalculator;
142 catch(exception& e) {
143 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
147 cout << "An unknown error has occurred in the DistanceCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
151 /**************************************************************************************************/
152 void DistanceCommand::createProcesses(string filename) {
157 //loop through and create all the processes you want
158 while (process != processors) {
162 processIDS[lines[process]->end] = pid; //create map from line number to pid so you can append files in correct order later
165 driver(distCalculator, seqDB, lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff);
167 }else { cout << "unable to spawn the necessary processes." << endl; exit(0); }
170 //force parent to wait until all the processes are done
171 for (it = processIDS.begin(); it != processIDS.end(); it++) {
172 int temp = it->second;
177 catch(exception& e) {
178 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182 cout << "An unknown error has occurred in the DistanceCommand class function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
187 /**************************************************************************************************/
188 /////// need to fix to work with calcs and sequencedb
189 int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
192 int startTime = time(NULL);
195 ofstream outFile(dFileName.c_str(), ios::trunc);
196 outFile.setf(ios::fixed, ios::showpoint);
197 outFile << setprecision(4);
199 for(int i=startLine;i<endLine;i++){
201 for(int j=0;j<i;j++){
202 distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
203 double dist = distCalculator->getDist();
206 if (isTrue(phylip) != true) { outFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl; }
208 if (isTrue(phylip) == true) { outFile << dist << '\t'; }
212 if (isTrue(phylip) == true) { outFile << endl; }
215 cout << i << '\t' << time(NULL) - startTime << endl;
219 cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
226 catch(exception& e) {
227 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231 cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
237 /**************************************************************************************************/
238 void DistanceCommand::appendFiles(string temp, string filename) {
243 //open output file in append mode
244 openOutputFileAppend(filename, output);
246 //open temp file for reading
247 openInputFile(temp, input);
250 //read input file and write to output file
251 while(input.eof() != true) {
252 getline(input, line); //getline removes the newline char
254 output << line << endl; // Appending back newline char
261 catch(exception& e) {
262 cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
266 cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
270 /**************************************************************************************************/