try {
globaldata = GlobalData::getInstance();
validCalculator = new ValidCalculators();
- ends = globaldata->getEnds();
+ countends = globaldata->getCountEnds();
convert(globaldata->getProcessors(), processors);
convert(globaldata->getCutOff(), cutoff);
+ //open file
+ string filename = globaldata->getFastaFile();
+ openInputFile(filename, in);
+
+
int i;
- if (ends != "T") {
+ if (isTrue(countends) == true) {
for (i=0; i<globaldata->Estimators.size(); i++) {
if (validCalculator->isValidCalculator("distance", globaldata->Estimators[i]) == true) {
if (globaldata->Estimators[i] == "nogaps") {
}else if (globaldata->Estimators[i] == "eachgap") {
distCalculator = new eachGapDist();
}else if (globaldata->Estimators[i] == "onegap") {
- distCalculator = new oneGapDist(); }
+ distCalculator = new oneGapDist(); }
}
}
}else {
}
}
}
-
+
//reset calc for next command
globaldata->setCalc("");
}
int DistanceCommand::execute(){
try {
- //read file
- string filename = globaldata->inputFileName;
-
- if(globaldata->getFastaFile() != "") {
- readSeqs = new ReadFasta(filename); }
- else if(globaldata->getNexusFile() != "") {
- readSeqs = new ReadNexus(filename); }
- else if(globaldata->getClustalFile() != "") {
- readSeqs = new ReadClustal(filename); }
- else if(globaldata->getPhylipFile() != "") {
- readSeqs = new ReadPhylip(filename); }
-
- readSeqs->read();
- seqDB = readSeqs->getDB();
-
+ //reads fasta file and fills sequenceDB
+ if(globaldata->getFastaFile() != "") { seqDB = new SequenceDB(in); }
+ else { cout << "Error no fasta file." << endl; return 0; }
+
int numSeqs = seqDB->getNumSeqs();
+ cutoff += 0.005;
string distFile = getRootName(globaldata->getFastaFile()) + "dist";
+ string phylipFile = getRootName(globaldata->getFastaFile()) + "phylip.dist";
+ remove(phylipFile.c_str());
remove(distFile.c_str());
- // # if defined (WIN_VERSION)
- // driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
- // # endif
+ //output numSeqs to phylip formatted dist file
+ openOutputFile(phylipFile, phylipOut);
+ phylipOut << numSeqs << endl;
+ phylipOut.close();
- // # if defined (LINUX_VERSION)
- if(processors == 1){
- driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
- }
- else if(processors == 2){
+ //# if defined (_WIN32)
+ //figure out how to implement the fork and wait commands in windows
+ // driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile, cutoff);
+ //# endif
- int pid = fork();
- if(pid > 0){
- driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);
- appendFiles((distFile+"tempa"), distFile);
- remove((distFile + "tempa").c_str());
+#if defined (__APPLE__) || (__MACH__)
+ if(processors == 1){
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((phylipFile + "tempPhylipA").c_str());
+ }
+ else if(processors == 2){
+
+ int pid = fork();
+ if(pid > 0){
+ driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
+ appendFiles((distFile+"tempa"), distFile);
+ remove((distFile + "tempa").c_str());
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((phylipFile + "tempPhylipA").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
+ appendFiles((distFile+"tempb"), distFile);
+ remove((distFile + "tempb").c_str());
+ appendFiles((phylipFile + "tempPhylipB"), phylipFile);
+ remove((phylipFile + "tempPhylipB").c_str());
+ }
+ wait(NULL);
+
+ }
+ else if(processors == 3){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
+ appendFiles(distFile+"tempa", distFile);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((distFile + "tempa").c_str());
+ remove((phylipFile + "tempPhylipA").c_str());
}
else{
- driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);
- appendFiles((distFile+"tempb"), distFile);
+ driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
+ appendFiles(distFile+"tempb", distFile);
+ appendFiles((phylipFile + "tempPhylipB"), phylipFile);
remove((distFile + "tempb").c_str());
+ remove((phylipFile + "tempPhylipB").c_str());
}
wait(NULL);
-
}
- else if(processors == 3){
- int pid1 = fork();
- if(pid1 > 0){
- int pid2 = fork();
- if(pid2 > 0){
- driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
- appendFiles(distFile+"tempa", distFile);
- remove((distFile + "tempa").c_str());
- }
- else{
- driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);
- appendFiles(distFile+"tempb", distFile);
- remove((distFile + "tempb").c_str());
- }
- wait(NULL);
+ else{
+ driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ appendFiles((phylipFile + "tempPhylipC"), phylipFile);
+ remove((distFile + "tempc").c_str());
+ remove((phylipFile + "tempPhylipC").c_str());
+ }
+ wait(NULL);
+ }
+ else if(processors == 4){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
+ appendFiles(distFile+"tempa", distFile);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((distFile + "tempa").c_str());
+ remove((phylipFile + "tempPhylipA").c_str());
}
else{
- driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);
- appendFiles(distFile+"tempc", distFile);
- remove((distFile + "tempc").c_str());
+ driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
+ appendFiles(distFile+"tempb", distFile);
+ appendFiles((phylipFile + "tempPhylipB"), phylipFile);
+ remove((distFile + "tempb").c_str());
+ remove((phylipFile + "tempPhylipB").c_str());
}
wait(NULL);
}
- else if(processors == 4){
- int pid1 = fork();
- if(pid1 > 0){
- int pid2 = fork();
- if(pid2 > 0){
- driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);
- appendFiles(distFile+"tempa", distFile);
- remove((distFile + "tempa").c_str());
- }
- else{
- driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);
- appendFiles(distFile+"tempb", distFile);
- remove((distFile + "tempb").c_str());
- }
- wait(NULL);
+ else{
+ int pid3 = fork();
+ if(pid3 > 0){
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ appendFiles((phylipFile + "tempPhylipC"), phylipFile);
+ remove((distFile + "tempc").c_str());
+ remove((phylipFile + "tempPhylipC").c_str());
+
}
else{
- int pid3 = fork();
- if(pid3 > 0){
- driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);
- appendFiles(distFile+"tempc", distFile);
- remove((distFile + "tempc").c_str());
- }
- else{
- driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);
- appendFiles(distFile+"tempd", distFile);
- remove((distFile + "tempd").c_str());
- }
- wait(NULL);
+ driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", phylipFile + "tempPhylipD", cutoff);
+ appendFiles(distFile+"tempd", distFile);
+ appendFiles((phylipFile + "tempPhylipD"), phylipFile);
+ remove((distFile + "tempd").c_str());
+ remove((phylipFile + "tempPhylipD").c_str());
}
wait(NULL);
}
wait(NULL);
- //# endif
-
+ }
+ wait(NULL);
+#elif (linux) || (__linux)
+ if(processors == 1){
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((phylipFile + "tempPhylipA").c_str());
+ }
+ else if(processors == 2){
+
+ int pid = fork();
+ if(pid > 0){
+ driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
+ appendFiles((distFile+"tempa"), distFile);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((distFile + "tempa").c_str());
+ remove((phylipFile + "tempPhylipA").c_str());
+
+ }
+ else{
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
+ appendFiles((distFile+"tempb"), distFile);
+ appendFiles((phylipFile + "tempPhylipB"), phylipFile);
+ remove((distFile + "tempb").c_str());
+ remove((phylipFile + "tempPhylipB").c_str());
+ }
+ wait();
+
+ }
+ else if(processors == 3){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
+ appendFiles((distFile+"tempa"), distFile);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((distFile + "tempa").c_str());
+ remove((phylipFile + "tempPhylipA").c_str());
+
+ }
+ else{
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
+ appendFiles((distFile+"tempb"), distFile);
+ appendFiles((phylipFile + "tempPhylipB"), phylipFile);
+ remove((distFile + "tempb").c_str());
+ remove((phylipFile + "tempPhylipB").c_str());
+ }
+ wait();
+ }
+ else{
+ driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ appendFiles((phylipFile + "tempPhylipC"), phylipFile);
+ remove((distFile + "tempc").c_str());
+ remove((phylipFile + "tempPhylipC").c_str());
+ }
+ wait();
+ }
+ else if(processors == 4){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", phylipFile + "tempPhylipA", cutoff);
+ appendFiles((distFile+"tempa"), distFile);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((distFile + "tempa").c_str());
+ remove((phylipFile + "tempPhylipA").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", phylipFile + "tempPhylipB", cutoff);
+ appendFiles((distFile+"tempb"), distFile);
+ appendFiles((phylipFile + "tempPhylipB"), phylipFile);
+ remove((distFile + "tempb").c_str());
+ remove((phylipFile + "tempPhylipB").c_str());
+ }
+ wait();
+ }
+ else{
+ int pid3 = fork();
+ if(pid3 > 0){
+ driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", phylipFile + "tempPhylipC", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ appendFiles((phylipFile + "tempPhylipC"), phylipFile);
+ remove((distFile + "tempc").c_str());
+ remove((phylipFile + "tempPhylipC").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", phylipFile + "tempPhylipD", cutoff);
+ appendFiles(distFile+"tempd", distFile);
+ appendFiles((phylipFile + "tempPhylipD"), phylipFile);
+ remove((distFile + "tempd").c_str());
+ remove((phylipFile + "tempPhylipD").c_str());
+ }
+ wait();
+ }
+ wait();
+ }
+ wait();
+
+#else
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, phylipFile + "tempPhylipA", cutoff);
+ appendFiles((phylipFile + "tempPhylipA"), phylipFile);
+ remove((phylipFile + "tempPhylipA").c_str());
+#endif
+
delete distCalculator;
-
+
return 0;
-
+
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
/**************************************************************************************************/
/////// need to fix to work with calcs and sequencedb
-int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
+int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, string pFilename, float cutoff){
try {
int startTime = time(NULL);
-
+
+ //column file
ofstream distFile(dFileName.c_str(), ios::trunc);
distFile.setf(ios::fixed, ios::showpoint);
distFile << setprecision(4);
-
- for(int i=startLine;i<endLine;i++){
+ //column file
+ ofstream philFile(pFilename.c_str(), ios::trunc);
+ philFile.setf(ios::fixed, ios::showpoint);
+ philFile << setprecision(4);
+
+ for(int i=startLine;i<endLine;i++){
+
for(int j=0;j<i;j++){
-//cout << "unaligned" << endl;
-//cout << align->get(i).getUnaligned() << " " << align->get(j).getUnaligned() << endl;
-//cout << "aligned" << endl;
-//cout << align->get(i).getAligned() << " " << align->get(j).getAligned() << endl;
- distCalculator->calcDist(align->get(i), align->get(j));
+ distCalculator->calcDist(*(align->get(i)), *(align->get(j)));
double dist = distCalculator->getDist();
-
+
if(dist <= cutoff){
- distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
+ distFile << align->get(i)->getName() << ' ' << align->get(j)->getName() << ' ' << dist << endl;
}
-
+ philFile << dist << '\t';
}
+
+ philFile << endl;
+
if(i % 100 == 0){
cout << i << '\t' << time(NULL) - startTime << endl;
}
-
+
}
cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
-
+
+ //philFile.close();
+ //distFile.close();
+
return 1;
}
catch(exception& e) {
cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
-
+
}
/**************************************************************************************************/
output << line << endl; // Appending back newline char
}
}
-
+
input.close();
output.close();
}