countends = globaldata->getCountEnds();
convert(globaldata->getProcessors(), processors);
convert(globaldata->getCutOff(), cutoff);
+ phylip = globaldata->getPhylipFile();
+
+ //open file
+ string filename = globaldata->getFastaFile();
+ openInputFile(filename, in);
+
+
int i;
- if (countends == "T") {
+ if (isTrue(countends) == true) {
for (i=0; i<globaldata->Estimators.size(); i++) {
if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
if (globaldata->Estimators[i] == "nogaps") {
}else if (globaldata->Estimators[i] == "eachgap") {
distCalculator = new eachGapDist();
}else if (globaldata->Estimators[i] == "onegap") {
- distCalculator = new oneGapDist(); }
+ distCalculator = new oneGapDist(); }
}
}
}else {
}
}
}
-
+
//reset calc for next command
globaldata->setCalc("");
}
int DistanceCommand::execute(){
try {
- //read file
- string filename = globaldata->inputFileName;
-
- if(globaldata->getFastaFile() != "") {
- readSeqs = new ReadFasta(filename); }
- else if(globaldata->getNexusFile() != "") {
- readSeqs = new ReadNexus(filename); }
- else if(globaldata->getClustalFile() != "") {
- readSeqs = new ReadClustal(filename); }
- else if(globaldata->getPhylipFile() != "") {
- readSeqs = new ReadPhylip(filename); }
-
- readSeqs->read();
- seqDB = readSeqs->getDB();
-
+ //reads fasta file and fills sequenceDB
+ if(globaldata->getFastaFile() != "") { seqDB = new SequenceDB(in); }
+ else { cout << "Error no fasta file." << endl; return 0; }
+
int numSeqs = seqDB->getNumSeqs();
cutoff += 0.005;
-
- string distFile = getRootName(globaldata->getFastaFile()) + "dist";
+ string phylipFile = "";
+ string distFile = getRootName(globaldata->getFastaFile()) + "dist";
remove(distFile.c_str());
+ //doses the user want the phylip formatted file as well
+ if (isTrue(phylip) == true) {
+ phylipFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
+ remove(phylipFile.c_str());
+
+ //output numSeqs to phylip formatted dist file
+ openOutputFile(phylipFile, phylipOut);
+ phylipOut << numSeqs << endl;
+ phylipOut.close();
+ }
+
+
//# if defined (_WIN32)
- //figure out how to implement the fork and wait commands in windows
- // driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
+ //figure out how to implement the fork and wait commands in windows
+ // driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
//# endif
- # if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
- driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
- }
- else if(processors == 2){
-
- int pid = fork();
- if(pid > 0){
- driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);
- appendFiles((distFile+"tempa"), distFile);
- remove((distFile + "tempa").c_str());
- }
- else{
- driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);
- appendFiles((distFile+"tempb"), distFile);
- remove((distFile + "tempb").c_str());
- }
- wait(NULL);
-
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ //if you don't need to fork anything
+ if(processors == 1){
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
+
+ if (isTrue(phylip) == true) {
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((phylipFile + "tempPhylipA").c_str());
}
- else if(processors == 3){
- int pid1 = fork();
- if(pid1 > 0){
- int pid2 = fork();
- if(pid2 > 0){
- driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
- appendFiles(distFile+"tempa", distFile);
- remove((distFile + "tempa").c_str());
- }
- else{
- driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);
- appendFiles(distFile+"tempb", distFile);
- remove((distFile + "tempb").c_str());
- }
- wait(NULL);
- }
- else{
- driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);
- appendFiles(distFile+"tempc", distFile);
- remove((distFile + "tempc").c_str());
- }
- wait(NULL);
+ }else{ //you have multiple processors
+
+ for (int i = 0; i < processors; i++) {
+ lines.push_back(new linePair());
+ lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs);
+ lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs);
}
- else if(processors == 4){
- int pid1 = fork();
- if(pid1 > 0){
- int pid2 = fork();
- if(pid2 > 0){
- driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);
- appendFiles(distFile+"tempa", distFile);
- remove((distFile + "tempa").c_str());
- }
- else{
- driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);
- appendFiles(distFile+"tempb", distFile);
- remove((distFile + "tempb").c_str());
- }
- wait(NULL);
- }
- else{
- int pid3 = fork();
- if(pid3 > 0){
- driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);
- appendFiles(distFile+"tempc", distFile);
- remove((distFile + "tempc").c_str());
- }
- else{
- driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);
- appendFiles(distFile+"tempd", distFile);
- remove((distFile + "tempd").c_str());
- }
- wait(NULL);
+
+ cout << lines[0]->start << '\t' << lines[0]->end << endl;
+ cout << lines[1]->start << '\t' << lines[1]->end << endl;
+
+ createProcesses(distFile, phylipFile);
+
+ //append and remove temp files
+ for (it = processIDS.begin(); it != processIDS.end(); it++) {
+ appendFiles((distFile + toString(it->second) + ".temp"), distFile);
+ remove((distFile + toString(it->second) + ".temp").c_str());
+
+ if (isTrue(phylip) == true) {
+ appendFiles((phylipFile + toString(it->second) + ".temp"), phylipFile);
+ remove((phylipFile + toString(it->second) + ".temp").c_str());
}
- wait(NULL);
}
- wait(NULL);
- # else
- driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
- # endif
-
+ }
+#else
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
+
+ if (isTrue(phylip) = true) {
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((phylipFile + "tempPhylipA").c_str());
+ }
+#endif
+
delete distCalculator;
-
+
return 0;
-
+
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
}
+/**************************************************************************************************/
+void DistanceCommand::createProcesses(string column, string phylip) {
+ try {
+ int process = 0;
+ processIDS.clear();
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS[lines[process]->end] = pid; //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ driver(distCalculator, seqDB, lines[process]->start, lines[process]->end, column + toString(getpid()) + ".temp", phylip + toString(getpid()) + ".temp", cutoff);
+ exit(0);
+ }else { cout << "unable to spawn the necessary processes." << endl; exit(0); }
+ }
+
+ //force parent to wait until all the processes are done
+ for (it = processIDS.begin(); it != processIDS.end(); it++) {
+ int temp = it->second;
+ wait(&temp);
+ }
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the DistanceCommand class function createProcesses. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
/**************************************************************************************************/
/////// need to fix to work with calcs and sequencedb
-int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
+int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, string pFilename, float cutoff){
try {
+
int startTime = time(NULL);
-
+
+ //column file
ofstream distFile(dFileName.c_str(), ios::trunc);
distFile.setf(ios::fixed, ios::showpoint);
distFile << setprecision(4);
-
- for(int i=startLine;i<endLine;i++){
+ ofstream philFile(pFilename.c_str(), ios::trunc);
+ philFile.setf(ios::fixed, ios::showpoint);
+ philFile << setprecision(4);
+
+ for(int i=startLine;i<endLine;i++){
+
for(int j=0;j<i;j++){
- distCalculator->calcDist(align->get(i), align->get(j));
+ distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
double dist = distCalculator->getDist();
-
+
if(dist <= cutoff){
- distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
+ distFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl;
}
-
+ if (isTrue(phylip) == true) { philFile << dist << '\t'; }
+
}
+
+ if (isTrue(phylip) == true) { philFile << endl; }
+
if(i % 100 == 0){
cout << i << '\t' << time(NULL) - startTime << endl;
}
-
+
}
cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
-
+
+ if (isTrue(phylip) != true) { remove(pFilename.c_str()); }
+
+ //philFile.close();
+ //distFile.close();
+
return 1;
}
catch(exception& e) {
cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
-
+
}
/**************************************************************************************************/
try{
ofstream output;
ifstream input;
-
+
//open output file in append mode
openOutputFileAppend(filename, output);
output << line << endl; // Appending back newline char
}
}
-
+
input.close();
output.close();
}