X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=summarycommand.cpp;h=43202e56cd2b86d7224a8b46b26526a511c6d55a;hb=bd27c2b0612942815b7417c79f7ee41f669a2a34;hp=85f0970f25930f563616c5fba9bc27fecda9cf98;hpb=050e1297eaf24fabbbe0e9b76c3a5acfb90eed7d;p=mothur.git diff --git a/summarycommand.cpp b/summarycommand.cpp index 85f0970..43202e5 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -38,19 +38,19 @@ //********************************************************************************************************************** vector SummaryCommand::setParameters(){ try { - CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist); - CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund); - CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund); - CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared); - CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "",true,false); parameters.push_back(pcalc); - CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund); - CommandParameter psize("size", "Number", "", "0", "", "", "",false,false); parameters.push_back(psize); - CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(plist); + CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(prabund); + CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(psabund); + CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(pshared); + CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); + CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize); + CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -84,7 +84,21 @@ string SummaryCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string SummaryCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "SummaryCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** SummaryCommand::SummaryCommand(){ try { @@ -254,7 +268,7 @@ SummaryCommand::SummaryCommand(string option) { else { subsample = false; subsampleSize = -1; } } - if (subsample == false) { iters = 1; } + if (subsample == false) { iters = 0; } else { //if you did not set a samplesize and are not using a sharedfile if ((subsampleSize == -1) && (format != "sharedfile")) { m->mothurOut("[ERROR]: If you want to subsample with a list, rabund or sabund file, you must provide the sample size. You can do this by setting subsample=yourSampleSize.\n"); abort=true; } @@ -288,13 +302,13 @@ int SummaryCommand::execute(){ numLines = 0; numCols = 0; - string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary"; - string fileNameAve = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "ave"; - string fileNameSTD = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "std"; - outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])); + string fileNameRoot = getOutputFileName("summary",variables); + variables["[tag]"] = "ave-std"; + string fileNameAve = getOutputFileName("summary",variables); + outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot); - - if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); groupIndex[fileNameRoot] = groups[p]; @@ -369,18 +383,14 @@ int SummaryCommand::execute(){ m->openOutputFile(fileNameRoot, outputFileHandle); outputFileHandle << "label"; - ofstream outAve, outSTD; + ofstream outAve; if (subsample) { m->openOutputFile(fileNameAve, outAve); - m->openOutputFile(fileNameSTD, outSTD); - outputNames.push_back(fileNameAve); outputTypes["ave"].push_back(fileNameAve); - outputNames.push_back(fileNameSTD); outputTypes["std"].push_back(fileNameSTD); - outAve << "label"; outSTD << "label"; + outputNames.push_back(fileNameAve); outputTypes["summary"].push_back(fileNameAve); + outAve << "label\tmethod"; outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); - outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint); if (inputFileNames.size() > 1) { groupIndex[fileNameAve] = groups[p]; - groupIndex[fileNameSTD] = groups[p]; } } @@ -391,17 +401,17 @@ int SummaryCommand::execute(){ for(int i=0;igetCols() == 1){ outputFileHandle << '\t' << sumCalculators[i]->getName(); - if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); outSTD << '\t' << sumCalculators[i]->getName(); } + if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); } numCols++; } else{ outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; - if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; outSTD << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } + if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } numCols += 3; } } outputFileHandle << endl; - if (subsample) { outSTD << endl; outAve << endl; } + if (subsample) { outAve << endl; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; @@ -409,11 +419,11 @@ int SummaryCommand::execute(){ - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;igetLabel()) == 1){ @@ -421,9 +431,9 @@ int SummaryCommand::execute(){ processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel()); - process(sabund, outputFileHandle, outAve, outSTD); + process(sabund, outputFileHandle, outAve); - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;igetLabel()); userLabels.erase(sabund->getLabel()); - process(sabund, outputFileHandle, outAve, outSTD); + process(sabund, outputFileHandle, outAve); - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;igetSAbundVector(); } - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i::iterator it; @@ -473,15 +483,15 @@ int SummaryCommand::execute(){ sabund = input->getSAbundVector(lastLabel); m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); - process(sabund, outputFileHandle, outAve, outSTD); + process(sabund, outputFileHandle, outAve); - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i data -> values @@ -559,7 +569,7 @@ int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, o outputFileHandle << endl; if (subsample) { - outAve << sabund->getLabel() << '\t'; outStd << sabund->getLabel() << '\t'; + outAve << sabund->getLabel() << '\t' << "ave\t"; //find ave and std for this label and output //will need to modify the createGroupSummary to combine results and not mess with the .summary file. @@ -594,14 +604,15 @@ int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, o } } + outAve << endl << sabund->getLabel() << '\t' << "std\t"; for (int i = 0; i < stdDev.size(); i++) { //finds average. for (int j = 0; j < stdDev[i].size(); j++) { stdDev[i][j] /= (float) iters; stdDev[i][j] = sqrt(stdDev[i][j]); - outStd << stdDev[i][j] << '\t'; + outAve << stdDev[i][j] << '\t'; } } - outAve << endl; outStd << endl; + outAve << endl; } return 0; @@ -702,38 +713,37 @@ vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, //open each groups summary file vector newComboNames; - string newLabel = ""; + map > > files; + map filesTypesLabels; + map filesTypesNumLines; for (int i=0; igetExtension(outputNames[i]); - string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; - m->mothurRemove(combineFileName); //remove old file - vector thisFilesLines; ifstream temp; m->openInputFile(outputNames[i], temp); //read through first line - labels - string tempLabel; - if (i == 0) { //we want to save the labels to output below - for (int j = 0; j < numCols+1; j++) { - temp >> tempLabel; - - if (j == 1) { newLabel += "group\t" + tempLabel + '\t'; - }else{ newLabel += tempLabel + '\t'; } - } - }else{ for (int j = 0; j < numCols+1; j++) { temp >> tempLabel; } } + string labelsLine = m->getline(temp); + vector theseLabels = m->splitWhiteSpace(labelsLine); + + string newLabel = ""; + for (int j = 0; j < theseLabels.size(); j++) { + if (j == 1) { newLabel += "group\t" + theseLabels[j] + '\t'; + }else{ newLabel += theseLabels[j] + '\t'; } + } m->gobble(temp); + int stop = numLines; + if (theseLabels.size() != numCols+1) { stop = numLines*2; } //for each label - for (int k = 0; k < numLines; k++) { + for (int k = 0; k < stop; k++) { string thisLine = ""; string tempLabel; - for (int j = 0; j < numCols+1; j++) { + for (int j = 0; j < theseLabels.size(); j++) { temp >> tempLabel; //save for later @@ -748,6 +758,13 @@ vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, m->gobble(temp); } + string extension = m->getExtension(outputNames[i]); + if (theseLabels.size() != numCols+1) { extension = ".ave-std" + extension; } + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; + m->mothurRemove(combineFileName); //remove old file + filesTypesLabels[extension] = newLabel; + filesTypesNumLines[extension] = stop; + map > >::iterator itFiles = files.find(extension); if (itFiles != files.end()) { //add new files info to existing type files[extension][outputNames[i]] = thisFilesLines; @@ -775,10 +792,10 @@ vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, m->openOutputFile(combineFileName, out); //output label line to new file - out << newLabel << endl; + out << filesTypesLabels[extension] << endl; //for each label - for (int k = 0; k < numLines; k++) { + for (int k = 0; k < filesTypesNumLines[extension]; k++) { //grab summary data for each group for (map >::iterator itType = thisType.begin(); itType != thisType.end(); itType++) {