X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=unifracweightedcommand.cpp;h=4883c48741717c677641c6b48bf7fe4ada0245f1;hp=541131efd5b15401c229a21966655d39e320a496;hb=18ccb5ee181cc69d8f8e9cbf178d7751e6199da5;hpb=006601d68abe8d0061f77e8d28323b160750e343

diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp
index 541131e..4883c48 100644
--- a/unifracweightedcommand.cpp
+++ b/unifracweightedcommand.cpp
@@ -15,20 +15,20 @@
 //**********************************************************************************************************************
 vector<string> UnifracWeightedCommand::setParameters(){	
 	try {
-		CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
-        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
-		CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
-		CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-		CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
-		CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-        CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
-        CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
-        CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
-		CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
-		CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
-		CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+		CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+		CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+		CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+		CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+		CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+        CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
+        CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
+		CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
+		CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
+		CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
 		
 		vector<string> myArray;
 		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
@@ -65,28 +65,22 @@ string UnifracWeightedCommand::getHelpString(){
 	}
 }
 //**********************************************************************************************************************
-string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){	
-	try {
-        string outputFileName = "";
-		map<string, vector<string> >::iterator it;
+string UnifracWeightedCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        if (type == "weighted")            {  pattern = "[filename],weighted-[filename],[tag],weighted";   }
+        else if (type == "wsummary")        {  pattern = "[filename],wsummary";   }
+        else if (type == "phylip")           {  pattern = "[filename],[tag],[tag2],dist";   }
+        else if (type == "column")           {  pattern = "[filename],[tag],[tag2],dist";   }
+        else if (type == "tree")             {  pattern = "[filename],[tag],[tag2],tre";   }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "weighted")            {   outputFileName =  "weighted";   }
-            else if (type == "wsummary")        {   outputFileName =  "wsummary";   }
-            else if (type == "phylip")           {   outputFileName =  "dist";   }
-            else if (type == "column")           {   outputFileName =  "dist";   }
-            else if (type == "tree")             {   outputFileName =  "tre";   }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-	}
-	catch(exception& e) {
-		m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
-		exit(1);
-	}
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 //**********************************************************************************************************************
 UnifracWeightedCommand::UnifracWeightedCommand(){	
@@ -296,8 +290,10 @@ int UnifracWeightedCommand::execute() {
         delete reader;
         
         if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
-				
-		sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
+		
+        map<string, string> variables; 
+		variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+		sumFile = getOutputFileName("wsummary",variables);
 		m->openOutputFile(sumFile, outSum);
 		outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
 		
@@ -359,9 +355,11 @@ int UnifracWeightedCommand::execute() {
             vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
             
             if (random) {  
-                output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);  
-                outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
-                outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("weighted"));
+                variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+                variables["[tag]"] = toString(i+1);
+                string wFileName = getOutputFileName("weighted", variables);
+                output = new ColumnFile(wFileName, itersString);
+				outputNames.push_back(wFileName); outputTypes["weighted"].push_back(wFileName);
             } 
             
             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
@@ -410,7 +408,7 @@ int UnifracWeightedCommand::execute() {
                 delete newCt;
                 delete subSampleTree;
                 
-                if((thisIter+1) % 100 == 0){	m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();		}
+                if((thisIter+1) % 100 == 0){	m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); 	}
             }
             
             if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } return 0; }
@@ -467,40 +465,28 @@ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dist
         //we need to find the average distance and standard deviation for each groups distance
         
         //finds sum
-        vector<double> averages; averages.resize(numComp, 0); 
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
-            for (int i = 0; i < dists[thisIter].size(); i++) {  
-                averages[i] += dists[thisIter][i];
-            }
-        }
-        
-        //finds average.
-        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
+        vector<double> averages = m->getAverages(dists);        
         
         //find standard deviation
-        vector<double> stdDev; stdDev.resize(numComp, 0);
-                
-        for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-            for (int j = 0; j < dists[thisIter].size(); j++) {
-                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
-            }
-        }
-        for (int i = 0; i < stdDev.size(); i++) {  
-            stdDev[i] /= (float) subsampleIters; 
-            stdDev[i] = sqrt(stdDev[i]);
-        }
+        vector<double> stdDev = m->getStandardDeviation(dists, averages);
         
         //make matrix with scores in it
-        vector< vector<double> > avedists;	avedists.resize(m->getNumGroups());
+        vector< vector<double> > avedists;	//avedists.resize(m->getNumGroups());
         for (int i = 0; i < m->getNumGroups(); i++) {
-            avedists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            avedists.push_back(temp);
         }
         
         //make matrix with scores in it
-        vector< vector<double> > stddists;	stddists.resize(m->getNumGroups());
+        vector< vector<double> > stddists;	//stddists.resize(m->getNumGroups());
         for (int i = 0; i < m->getNumGroups(); i++) {
-            stddists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            //stddists[i].resize(m->getNumGroups(), 0.0);
+            stddists.push_back(temp);
         }
+
         
         //flip it so you can print it
         int count = 0;
@@ -514,13 +500,18 @@ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dist
             }
         }
         
-        string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
+        map<string, string> variables; 
+		variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+        variables["[tag]"] = toString(treeNum+1);
+        variables["[tag2]"] = "weighted.ave";
+        string aveFileName = getOutputFileName("phylip",variables);
         if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
         else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
         ofstream out;
         m->openOutputFile(aveFileName, out);
         
-        string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
+        variables["[tag2]"] = "weighted.std";
+        string stdFileName = getOutputFileName("phylip",variables);
         if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
         else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
         ofstream outStd;
@@ -611,7 +602,11 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, i
         Tree* conTree = con.getTree(newTrees);
         
         //create a new filename
-        string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");				
+        map<string, string> variables; 
+		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
+        variables["[tag]"] = toString(treeNum+1);
+        variables["[tag2]"] = "weighted.cons";
+        string conFile = getOutputFileName("tree",variables);							
         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
         ofstream outTree;
         m->openOutputFile(conFile, outTree);
@@ -635,7 +630,11 @@ vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists
         vector<Tree*> trees;
         
         //create a new filename
-        string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");				
+        map<string, string> variables; 
+		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
+        variables["[tag]"] = toString(treeNum+1);
+        variables["[tag2]"] = "weighted.all";
+        string outputFile = getOutputFileName("tree",variables);				
         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
         
         ofstream outAll;
@@ -699,39 +698,32 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersS
         
         lines.clear();
         
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-        if(processors != 1){
-            int numPairs = namesOfGroupCombos.size();
-            int numPairsPerProcessor = numPairs / processors;
+        //breakdown work between processors
+        int numPairs = namesOfGroupCombos.size();
+        int numPairsPerProcessor = numPairs / processors;
             
-            for (int i = 0; i < processors; i++) {
-                int startPos = i * numPairsPerProcessor;
-                if(i == processors - 1){
-                    numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                }
-                lines.push_back(linePair(startPos, numPairsPerProcessor));
-            }
+        for (int i = 0; i < processors; i++) {
+            int startPos = i * numPairsPerProcessor;
+            if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+            lines.push_back(linePair(startPos, numPairsPerProcessor));
         }
-#endif
         
         
         //get scores for random trees
         for (int j = 0; j < iters; j++) {
-            cout << j << endl; 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-            if(processors == 1){
-                driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
-            }else{
+//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            //if(processors == 1){
+              //  driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
+           // }else{
                 createProcesses(thisTree,  namesOfGroupCombos, rScores);
-            }
-#else
-            driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
-#endif
+           // }
+//#else
+            //driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
+//#endif
+ 
             
             if (m->control_pressed) { delete ct;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } return 0; }
             
-            //report progress
-            //					m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();		
         }
         lines.clear();
         
@@ -767,12 +759,11 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersS
 
 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
 	try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-		int process = 1;
+        int process = 1;
 		vector<int> processIDS;
-		
 		EstOutput results;
-		
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 		//loop through and create all the processes you want
 		while (process != processors) {
 			int pid = fork();
@@ -818,9 +809,53 @@ int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > na
 			in.close();
 			m->mothurRemove(s);
 		}
+#else
+        //fill in functions
+        vector<weightedRandomData*> pDataArray;
+		DWORD   dwThreadIdArray[processors-1];
+		HANDLE  hThreadArray[processors-1];
+        vector<CountTable*> cts;
+        vector<Tree*> trees;
 		
-		return 0;
-#endif		
+		//Create processor worker threads.
+		for( int i=1; i<processors; i++ ){
+            CountTable* copyCount = new CountTable();
+            copyCount->copy(ct);
+            Tree* copyTree = new Tree(copyCount);
+            copyTree->getCopy(t);
+            
+            cts.push_back(copyCount);
+            trees.push_back(copyTree);
+            
+            vector< vector<double> > copyScores = rScores;
+            
+            weightedRandomData* tempweighted = new weightedRandomData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot, copyScores);
+			pDataArray.push_back(tempweighted);
+			processIDS.push_back(i);
+            
+			hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+		}
+		
+		driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
+		
+		//Wait until all threads have terminated.
+		WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+		
+		//Close all thread handles and free memory allocations.
+		for(int i=0; i < pDataArray.size(); i++){
+            for (int j = pDataArray[i]->start; j < (pDataArray[i]->start+pDataArray[i]->num); j++) {
+                scores[j].push_back(pDataArray[i]->scores[j][pDataArray[i]->scores[j].size()-1]);
+            }
+			delete cts[i];
+            delete trees[i];
+			CloseHandle(hThreadArray[i]);
+			delete pDataArray[i];
+		}
+
+		
+#endif	
+        
+        return 0;
 	}
 	catch(exception& e) {
 		m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
@@ -880,7 +915,7 @@ void UnifracWeightedCommand::printWeightedFile() {
 		for(int a = 0; a < numComp; a++) {
 			output->initFile(groupComb[a], tags);
 			//print each line
-			for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
+			for (map<double,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
 				data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
 				output->output(data);
 				data.clear();
@@ -943,14 +978,20 @@ void UnifracWeightedCommand::createPhylipFile() {
 		//for each tree
 		for (int i = 0; i < T.size(); i++) { 
 		
-			string phylipFileName;
-			if ((outputForm == "lt") || (outputForm == "square")) {
-				phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
-				outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
-			}else { //column
-				phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
-				outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
-			}
+            string phylipFileName;
+			map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+            variables["[tag]"] = toString(i+1);
+            if ((outputForm == "lt") || (outputForm == "square")) {
+                variables["[tag2]"] = "weighted.phylip";
+                phylipFileName = getOutputFileName("phylip",variables);
+                outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
+            }else { //column
+                variables["[tag2]"] = "weighted.column";
+                phylipFileName = getOutputFileName("column",variables);
+                outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
+            }
+
 			
 			ofstream out;
 			m->openOutputFile(phylipFileName, out);
@@ -1044,7 +1085,7 @@ void UnifracWeightedCommand::calculateFreqsCumuls() {
 		for (int f = 0; f < numComp; f++) {
 			for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
 				validScores[rScores[f][i]] = rScores[f][i];
-				map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
+				map<double,double>::iterator it = rScoreFreq[f].find(rScores[f][i]);
 				if (it != rScoreFreq[f].end()) {
 					rScoreFreq[f][rScores[f][i]]++;
 				}else{
@@ -1057,9 +1098,9 @@ void UnifracWeightedCommand::calculateFreqsCumuls() {
 		for(int a = 0; a < numComp; a++) {
 			float rcumul = 1.0000;
 			//this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
-			for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
+			for (map<double,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
 				//make rscoreFreq map and rCumul
-				map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
+				map<double,double>::iterator it2 = rScoreFreq[a].find(it->first);
 				rCumul[a][it->first] = rcumul;
 				//get percentage of random trees with that info
 				if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }