X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rarefactcommand.cpp;h=b8c1c6f1a6420f8a139eb5c71b0320b87277035e;hb=2bb9267aa4b4ecdf8488b06605cc9f3f36fa4332;hp=1c04bc4384fb632db099867821b6ffb3b4d79e95;hpb=e150b0b0664caec517485ee6d69dcdade6dcae77;p=mothur.git diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 1c04bc4..b8c1c6f 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -38,6 +38,7 @@ vector RareFactCommand::setParameters(){ CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "",true,false); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); + CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -63,6 +64,7 @@ string RareFactCommand::getHelpString(){ helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n"; helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n"; validCalculator.printCalc("rarefaction"); + helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n"; return helpString; @@ -184,22 +186,22 @@ RareFactCommand::RareFactCommand(string option) { listfile = validParameter.validFile(parameters, "list", true); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } - else { format = "list"; inputfile = listfile; } + else { format = "list"; inputfile = listfile; m->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund", true); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } - else { format = "sabund"; inputfile = sabundfile; } + else { format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund", true); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } - else { format = "rabund"; inputfile = rabundfile; } + else { format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); } sharedfile = validParameter.validFile(parameters, "shared", true); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } - else { format = "sharedfile"; inputfile = sharedfile; } + else { format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); } if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { //is there are current file available for any of these? @@ -243,20 +245,28 @@ RareFactCommand::RareFactCommand(string option) { if (calc == "default") { calc = "sobs"; } } m->splitAtDash(calc, Estimators); + if (m->inUsersGroups("citation", Estimators)) { + ValidCalculators validCalc; validCalc.printCitations(Estimators); + //remove citation from list of calcs + for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } + } string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } - convert(temp, freq); + m->mothurConvert(temp, freq); temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } - convert(temp, abund); + m->mothurConvert(temp, abund); temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } - convert(temp, nIters); + m->mothurConvert(temp, nIters); temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); + + temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; } + groupMode = m->isTrue(temp); } } @@ -277,14 +287,16 @@ int RareFactCommand::execute(){ if (m->control_pressed) { return 0; } + map nameMap; for (int p = 0; p < inputFileNames.size(); p++) { string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])); - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); + nameMap[fileNameRoot] = groups[p]; } int i; ValidCalculators validCalculator; @@ -355,12 +367,12 @@ int RareFactCommand::execute(){ set processedLabels; set userLabels = labels; - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } //as long as you are not at the end of the file or done wih the lines you want while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(order->getLabel()) == 1){ @@ -398,7 +410,7 @@ int RareFactCommand::execute(){ order = (input->getOrderVector()); } - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set::iterator it; @@ -413,7 +425,7 @@ int RareFactCommand::execute(){ } } - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } //run last label if you need to if (needToRun == true) { @@ -435,7 +447,12 @@ int RareFactCommand::execute(){ } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //create summary file containing all the groups data for each label - this function just combines the info from the files already created. + if ((sharedfile != "") && (groupMode)) { outputNames = createGroupFile(outputNames, nameMap); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -450,6 +467,136 @@ int RareFactCommand::execute(){ } } //********************************************************************************************************************** +vector RareFactCommand::createGroupFile(vector& outputNames, map nameMap) { + try { + + vector newFileNames; + + //find different types of files + map > typesFiles; + for (int i = 0; i < outputNames.size(); i++) { + string extension = m->getExtension(outputNames[i]); + + ifstream in; + m->openInputFile(outputNames[i], in); + + string labels = m->getline(in); + string newLine = labels.substr(0, labels.find_first_of('\t')); + + newLine += "\tGroup" + labels.substr(labels.find_first_of('\t')); + + typesFiles[extension].push_back(outputNames[i]); + + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; + + //print headers + ofstream out; + m->openOutputFile(combineFileName, out); + out << newLine << endl; + out.close(); + + } + + //for each type create a combo file + map lineToNumber; + for (map >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) { + + ofstream out; + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first; + m->openOutputFileAppend(combineFileName, out); + newFileNames.push_back(combineFileName); + + vector thisTypesFiles = it->second; + + //open each type summary file + map > files; //maps file name to lines in file + int maxLines = 0; + int numColumns = 0; + for (int i=0; iopenInputFile(thisTypesFiles[i], temp); + + //read through first line - labels + m->getline(temp); m->gobble(temp); + + vector thisFilesLines; + string fileNameRoot = m->getRootName(thisTypesFiles[i]); + map::iterator itName = nameMap.find(fileNameRoot); + string group = ""; + if (itName != nameMap.end()) { + group = itName->second; + }else { + group = "not found" + i; + m->mothurOut("[ERROR]: can't parse filename."); m->mothurOutEndLine(); + } + + thisFilesLines.push_back(group); + int count = 1; + while (!temp.eof()){ + + string thisLine = m->getline(temp); + + string numSampled = thisLine.substr(0, thisLine.find_first_of('\t')); + int num = 0; + convert(numSampled, num); + numColumns = m->getNumChar(thisLine, '\t'); + lineToNumber[count] = num; + count++; + + thisFilesLines.push_back(thisLine); + + m->gobble(temp); + } + + files[thisTypesFiles[i]] = thisFilesLines; + + //save longest file for below + if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); } + + temp.close(); + m->mothurRemove(thisTypesFiles[i]); + } + + + //for each label + for (int k = 1; k < maxLines; k++) { + + //grab data for each group + for (int i=0; i::iterator itLine = lineToNumber.find(k); + if (itLine != lineToNumber.end()) { + string output = toString(itLine->second); + if (k < files[thisTypesFiles[i]].size()) { + string line = files[thisTypesFiles[i]][k]; + output = line.substr(0, line.find_first_of('\t')); + output += '\t' + files[thisTypesFiles[i]][0] + '\t' + line.substr(line.find_first_of('\t')); + }else{ + output += '\t' + files[thisTypesFiles[i]][0] + '\t'; + for (int h = 0; h < numColumns; h++) { + output += "NA\t"; + } + } + out << output << endl; + }else { m->mothurOut("[ERROR]: parsing results, cant find " + toString(k)); m->mothurOutEndLine(); } + } + } + + out.close(); + + } + + //return combine file name + return newFileNames; + + } + catch(exception& e) { + m->errorOut(e, "RareFactCommand", "createGroupFile"); + exit(1); + } +} +//********************************************************************************************************************** vector RareFactCommand::parseSharedFile(string filename) { try { vector filenames; @@ -464,7 +611,7 @@ vector RareFactCommand::parseSharedFile(string filename) { //clears file before we start to write to it below for (int i=0; igetGroup() + ".rabund").c_str()); + m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund")); filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund")); } @@ -494,7 +641,7 @@ vector RareFactCommand::parseSharedFile(string filename) { } delete input; - m->Groups.clear(); + m->clearGroups(); return filenames; }