5 // Created by SarahsWork on 5/10/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "sparcccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SparccCommand::setParameters(){
15 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
16 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter psamplings("samplings", "Number", "", "20", "", "", "","",false,false,false); parameters.push_back(psamplings);
19 CommandParameter piterations("iterations", "Number", "", "10", "", "", "","",false,false,false); parameters.push_back(piterations);
20 CommandParameter ppermutations("permutations", "Number", "", "1000", "", "", "","",false,false,false); parameters.push_back(ppermutations);
21 CommandParameter pmethod("method", "Multiple", "relabund-dirichlet", "dirichlet", "", "", "","",false,false); parameters.push_back(pmethod);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
23 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "SparccCommand", "setParameters");
36 //**********************************************************************************************************************
37 string SparccCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The sparcc command allows you to ....\n";
41 helpString += "The sparcc command parameters are: shared, groups, label, samplings, iterations, permutations, processors and method.\n";
42 helpString += "The samplings parameter is used to .... Default=20.\n";
43 helpString += "The iterations parameter is used to ....Default=10.\n";
44 helpString += "The permutations parameter is used to ....Default=1000.\n";
45 helpString += "The method parameter is used to ....Options are relabund and dirichlet. Default=dirichlet.\n";
46 helpString += "The default value for groups is all the groups in your sharedfile.\n";
47 helpString += "The label parameter is used to analyze specific labels in your shared file.\n";
48 helpString += "The sparcc command should be in the following format: sparcc(shared=yourSharedFile)\n";
49 helpString += "sparcc(shared=final.an.shared)\n";
53 m->errorOut(e, "SparccCommand", "getHelpString");
57 //**********************************************************************************************************************
58 string SparccCommand::getOutputPattern(string type) {
62 if (type == "corr") { pattern = "[filename],[distance],sparcc_correlation"; }
63 else if (type == "pvalue") { pattern = "[filename],[distance],sparcc_pvalue"; }
64 else if (type == "sparccrelabund") { pattern = "[filename],[distance],sparcc_relabund"; }
65 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
70 m->errorOut(e, "SparccCommand", "getOutputPattern");
74 //**********************************************************************************************************************
75 SparccCommand::SparccCommand(){
77 abort = true; calledHelp = true;
79 vector<string> tempOutNames;
80 outputTypes["corr"] = tempOutNames;
81 outputTypes["pvalue"] = tempOutNames;
82 outputTypes["sparccrelabund"] = tempOutNames;
85 m->errorOut(e, "SparccCommand", "SparccCommand");
89 //**********************************************************************************************************************
90 SparccCommand::SparccCommand(string option) {
92 abort = false; calledHelp = false;
95 //allow user to run help
96 if(option == "help") { help(); abort = true; calledHelp = true; }
97 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100 //valid paramters for this command
101 vector<string> myArray = setParameters();
103 OptionParser parser(option);
104 map<string,string> parameters = parser.getParameters();
106 ValidParameters validParameter;
107 map<string,string>::iterator it;
108 //check to make sure all parameters are valid for command
109 for (it = parameters.begin(); it != parameters.end(); it++) {
110 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
113 vector<string> tempOutNames;
114 outputTypes["corr"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos...
115 outputTypes["pvalue"] = tempOutNames;
116 outputTypes["sparccrelabund"] = tempOutNames;
118 //if the user changes the input directory command factory will send this info to us in the output parameter
119 string inputDir = validParameter.validFile(parameters, "inputdir", false);
120 if (inputDir == "not found"){ inputDir = ""; }
124 it = parameters.find("shared");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["shared"] = inputDir + it->second; }
133 //check for parameters
134 //get shared file, it is required
135 sharedfile = validParameter.validFile(parameters, "shared", true);
136 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
137 else if (sharedfile == "not found") {
138 //if there is a current shared file, use it
139 sharedfile = m->getSharedFile();
140 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
141 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
142 }else { m->setSharedFile(sharedfile); }
144 //if the user changes the output directory command factory will send this info to us in the output parameter
145 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
146 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
149 normalizeMethod = validParameter.validFile(parameters, "method", false);
150 if (normalizeMethod == "not found") { normalizeMethod = "dirichlet"; }
151 if ((normalizeMethod == "dirichlet") || (normalizeMethod == "relabund")) { }
152 else { m->mothurOut(normalizeMethod + " is not a valid method. Valid methods are dirichlet and relabund."); m->mothurOutEndLine(); abort = true; }
155 string temp = validParameter.validFile(parameters, "samplings", false); if (temp == "not found"){ temp = "20"; }
156 m->mothurConvert(temp, numSamplings);
158 if(normalizeMethod == "relabund"){ numSamplings = 1; }
160 temp = validParameter.validFile(parameters, "iterations", false); if (temp == "not found"){ temp = "10"; }
161 m->mothurConvert(temp, maxIterations);
163 temp = validParameter.validFile(parameters, "permutations", false); if (temp == "not found"){ temp = "1000"; }
164 m->mothurConvert(temp, numPermutations);
166 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
167 m->setProcessors(temp);
168 m->mothurConvert(temp, processors);
170 string groups = validParameter.validFile(parameters, "groups", false);
171 if (groups == "not found") { groups = ""; }
172 else { m->splitAtDash(groups, Groups); }
173 m->setGroups(Groups);
175 string label = validParameter.validFile(parameters, "label", false);
176 if (label == "not found") { label = ""; }
178 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
179 else { allLines = 1; }
184 catch(exception& e) {
185 m->errorOut(e, "SparccCommand", "SparccCommand");
189 //**********************************************************************************************************************
190 int SparccCommand::execute(){
193 if (abort == true) { if (calledHelp) { return 0; } return 2; }
195 int start = time(NULL);
197 InputData input(sharedfile, "sharedfile");
198 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
199 string lastLabel = lookup[0]->getLabel();
201 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
202 set<string> processedLabels;
203 set<string> userLabels = labels;
205 //as long as you are not at the end of the file or done wih the lines you want
206 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
208 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
210 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
212 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
216 processedLabels.insert(lookup[0]->getLabel());
217 userLabels.erase(lookup[0]->getLabel());
220 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
221 string saveLabel = lookup[0]->getLabel();
223 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
224 lookup = input.getSharedRAbundVectors(lastLabel);
225 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
229 processedLabels.insert(lookup[0]->getLabel());
230 userLabels.erase(lookup[0]->getLabel());
232 //restore real lastlabel to save below
233 lookup[0]->setLabel(saveLabel);
236 lastLabel = lookup[0]->getLabel();
237 //prevent memory leak
238 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
240 if (m->control_pressed) { return 0; }
242 //get next line to process
243 lookup = input.getSharedRAbundVectors();
246 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
248 //output error messages about any remaining user labels
249 set<string>::iterator it;
250 bool needToRun = false;
251 for (it = userLabels.begin(); it != userLabels.end(); it++) {
252 m->mothurOut("Your file does not include the label " + *it);
253 if (processedLabels.count(lastLabel) != 1) {
254 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
257 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
261 //run last label if you need to
262 if (needToRun == true) {
263 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
264 lookup = input.getSharedRAbundVectors(lastLabel);
266 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
270 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
273 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
275 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to process.");
276 m->mothurOutEndLine();
277 m->mothurOutEndLine();
279 //output files created by command
280 m->mothurOutEndLine();
281 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
282 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
283 m->mothurOutEndLine();
287 catch(exception& e) {
288 m->errorOut(e, "SparccCommand", "execute");
292 /**************************************************************************************************/
293 vector<vector<float> > SparccCommand::shuffleSharedVector(vector<vector<float> >& sharedVector){
295 int numGroups = (int)sharedVector.size();
296 int numOTUs = (int)sharedVector[0].size();
298 vector<vector<float> > shuffledVector = sharedVector;
300 for(int i=0;i<numGroups;i++){
301 for(int j=0;j<numOTUs;j++){
302 shuffledVector[i][j] = sharedVector[rand()%numGroups][j];
306 return shuffledVector;
308 catch(exception& e) {
309 m->errorOut(e, "SparccCommand", "execute");
313 //**********************************************************************************************************************
314 int SparccCommand::process(vector<SharedRAbundVector*>& lookup){
316 cout.setf(ios::fixed, ios::floatfield);
317 cout.setf(ios::showpoint);
319 vector<vector<float> > sharedVector;
320 vector<string> otuNames = m->currentBinLabels;
322 //fill sharedVector to pass to CalcSparcc
323 for (int i = 0; i < lookup.size(); i++) {
324 vector<int> abunds = lookup[i]->getAbundances();
326 for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); }
327 sharedVector.push_back(temp);
329 int numOTUs = (int)sharedVector[0].size();
330 int numGroups = lookup.size();
332 map<string, string> variables;
333 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
334 variables["[distance]"] = lookup[0]->getLabel();
337 string relAbundFileName = getOutputFileName("sparccrelabund", variables);
338 ofstream relAbundFile;
339 m->openOutputFile(relAbundFileName, relAbundFile);
340 outputNames.push_back(relAbundFileName); outputTypes["sparccrelabund"].push_back(relAbundFileName);
342 relAbundFile << "OTU\taveRelAbund\n";
343 for(int i=0;i<numOTUs;i++){
344 if (m->control_pressed) { relAbundFile.close(); return 0; }
346 double relAbund = 0.0000;
347 for(int j=0;j<numGroups;j++){
348 relAbund += sharedVector[j][i]/(double)lookup[j]->getNumSeqs();
350 relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl;
352 relAbundFile.close();
354 CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod);
355 vector<vector<float> > origCorrMatrix = originalData.getRho();
357 string correlationFileName = getOutputFileName("corr", variables);
358 ofstream correlationFile;
359 m->openOutputFile(correlationFileName, correlationFile);
360 outputNames.push_back(correlationFileName); outputTypes["corr"].push_back(correlationFileName);
361 correlationFile.setf(ios::fixed, ios::floatfield);
362 correlationFile.setf(ios::showpoint);
364 for(int i=0;i<numOTUs;i++){ correlationFile << '\t' << otuNames[i]; } correlationFile << endl;
365 for(int i=0;i<numOTUs;i++){
366 correlationFile << otuNames[i];
367 for(int j=0;j<numOTUs;j++){
368 correlationFile << '\t' << origCorrMatrix[i][j];
370 correlationFile << endl;
374 if(numPermutations != 0){
375 vector<vector<float> > pValues = createProcesses(sharedVector, origCorrMatrix);
377 if (m->control_pressed) { return 0; }
379 string pValueFileName = getOutputFileName("pvalue", variables);
381 m->openOutputFile(pValueFileName, pValueFile);
382 outputNames.push_back(pValueFileName); outputTypes["pvalue"].push_back(pValueFileName);
383 pValueFile.setf(ios::fixed, ios::floatfield);
384 pValueFile.setf(ios::showpoint);
386 for(int i=0;i<numOTUs;i++){ pValueFile << '\t' << otuNames[i]; } pValueFile << endl;
387 for(int i=0;i<numOTUs;i++){
388 pValueFile << otuNames[i];
389 for(int j=0;j<numOTUs;j++){
390 pValueFile << '\t' << pValues[i][j];
399 catch(exception& e) {
400 m->errorOut(e, "SparccCommand", "process");
404 //**********************************************************************************************************************
405 vector<vector<float> > SparccCommand::createProcesses(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix){
407 int numOTUs = sharedVector[0].size();
408 vector<vector<float> > pValues;
411 pValues = driver(sharedVector, origCorrMatrix, numPermutations);
413 //divide iters between processors
414 vector<int> procIters;
415 int numItersPerProcessor = numPermutations / processors;
417 //divide iters between processes
418 for (int h = 0; h < processors; h++) {
419 if(h == processors - 1){ numItersPerProcessor = numPermutations - h * numItersPerProcessor; }
420 procIters.push_back(numItersPerProcessor);
423 vector<int> processIDS;
426 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
428 //loop through and create all the processes you want
429 while (process != processors) {
433 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
436 pValues = driver(sharedVector, origCorrMatrix, procIters[process]);
438 //pass pvalues to parent
440 string tempFile = toString(getpid()) + ".pvalues.temp";
441 m->openOutputFile(tempFile, out);
444 for (int i = 0; i < pValues.size(); i++) {
445 for (int j = 0; j < pValues[i].size(); j++) {
446 out << pValues[i][j] << '\t';
456 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
457 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
463 pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
465 //force parent to wait until all the processes are done
466 for (int i=0;i<processIDS.size();i++) {
467 int temp = processIDS[i];
472 for (int i = 0; i < processIDS.size(); i++) {
474 string tempFile = toString(processIDS[i]) + ".pvalues.temp";
475 m->openInputFile(tempFile, in);
477 ////// to do ///////////
478 int numTemp; numTemp = 0;
480 for (int j = 0; j < pValues.size(); j++) {
481 for (int k = 0; k < pValues.size(); k++) {
482 in >> numTemp; m->gobble(in);
483 pValues[j][k] += numTemp;
487 in.close(); m->mothurRemove(tempFile);
492 vector<sparccData*> pDataArray;
493 DWORD dwThreadIdArray[processors-1];
494 HANDLE hThreadArray[processors-1];
496 //Create processor worker threads.
497 for( int i=1; i<processors; i++ ){
499 //make copy so we don't get access violations
500 vector< vector<float> > copySharedVector = sharedVector;
501 vector< vector<float> > copyOrig = origCorrMatrix;
503 sparccData* temp = new sparccData(m, procIters[i], copySharedVector, copyOrig, numSamplings, maxIterations, numPermutations, normalizeMethod);
504 pDataArray.push_back(temp);
505 processIDS.push_back(i);
507 hThreadArray[i-1] = CreateThread(NULL, 0, MySparccThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
511 pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
513 //Wait until all threads have terminated.
514 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
516 //Close all thread handles and free memory allocations.
517 for(int i=0; i < pDataArray.size(); i++){
518 for (int j = 0; j < pDataArray[i]->pValues.size(); j++) {
519 for (int k = 0; k < pDataArray[i]->pValues[j].size(); k++) {
520 pValues[j][k] += pDataArray[i]->pValues[j][k];
524 CloseHandle(hThreadArray[i]);
525 delete pDataArray[i];
530 for(int i=0;i<numOTUs;i++){
532 for(int j=0;j<i;j++){
533 pValues[i][j] /= (double)numPermutations;
534 pValues[j][i] = pValues[i][j];
540 catch(exception& e) {
541 m->errorOut(e, "SparccCommand", "createProcesses");
546 //**********************************************************************************************************************
547 vector<vector<float> > SparccCommand::driver(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix, int numPerms){
549 int numOTUs = sharedVector[0].size();
550 vector<vector<float> > sharedShuffled = sharedVector;
551 vector<vector<float> > pValues(numOTUs);
552 for(int i=0;i<numOTUs;i++){ pValues[i].assign(numOTUs, 0); }
554 for(int i=0;i<numPerms;i++){
555 if (m->control_pressed) { return pValues; }
556 sharedShuffled = shuffleSharedVector(sharedVector);
557 CalcSparcc permutedData(sharedShuffled, maxIterations, numSamplings, normalizeMethod);
558 vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
560 for(int j=0;j<numOTUs;j++){
561 for(int k=0;k<j;k++){
562 double randValue = permuteCorrMatrix[j][k];
563 double observedValue = origCorrMatrix[j][k];
564 if(observedValue >= 0 && randValue > observedValue) { pValues[j][k]++; }//this method seems to deflate the
565 else if(observedValue < 0 && randValue < observedValue){ pValues[j][k]++; }//pvalues of small rho values
568 if((i+1) % (int)(numPermutations * 0.05) == 0){ cout << i+1 << endl; }
573 catch(exception& e) {
574 m->errorOut(e, "SparccCommand", "driver");
578 //**********************************************************************************************************************