X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rarefactcommand.cpp;h=82ff7faef0d8886d21fc6b642f34c88c1b143ea1;hb=a6cf29fa4dac0909c7582cb1094151d34093ee76;hp=b9007f4c430de320ba2a63d32c98cd30a08fcb83;hpb=7bf9a81bba76538ecaf351ae208de3da4bf1b6dd;p=mothur.git diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index b9007f4..82ff7fa 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -38,6 +38,7 @@ vector RareFactCommand::setParameters(){ CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "",true,false); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); + CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -63,6 +64,7 @@ string RareFactCommand::getHelpString(){ helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n"; helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n"; validCalculator.printCalc("rarefaction"); + helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n"; return helpString; @@ -251,17 +253,20 @@ RareFactCommand::RareFactCommand(string option) { string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } - convert(temp, freq); + m->mothurConvert(temp, freq); temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } - convert(temp, abund); + m->mothurConvert(temp, abund); temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } - convert(temp, nIters); + m->mothurConvert(temp, nIters); temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); + + temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; } + groupMode = m->isTrue(temp); } } @@ -282,11 +287,12 @@ int RareFactCommand::execute(){ if (m->control_pressed) { return 0; } + map file2Group; //index in outputNames[i] -> group for (int p = 0; p < inputFileNames.size(); p++) { string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])); - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -345,6 +351,7 @@ int RareFactCommand::execute(){ rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs"))); outputNames.push_back(fileNameRoot+"r_nseqs"); outputTypes["r_nseqs"].push_back(fileNameRoot+"r_nseqs"); } + if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; } } } @@ -360,12 +367,12 @@ int RareFactCommand::execute(){ set processedLabels; set userLabels = labels; - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } //as long as you are not at the end of the file or done wih the lines you want while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(order->getLabel()) == 1){ @@ -403,7 +410,7 @@ int RareFactCommand::execute(){ order = (input->getOrderVector()); } - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set::iterator it; @@ -418,7 +425,7 @@ int RareFactCommand::execute(){ } } - if (m->control_pressed) { for(int i=0;icontrol_pressed) { for(int i=0;imothurRemove(outputNames[i]); } return 0; } //run last label if you need to if (needToRun == true) { @@ -440,7 +447,12 @@ int RareFactCommand::execute(){ } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //create summary file containing all the groups data for each label - this function just combines the info from the files already created. + if ((sharedfile != "") && (groupMode)) { outputNames = createGroupFile(outputNames, file2Group); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -455,6 +467,136 @@ int RareFactCommand::execute(){ } } //********************************************************************************************************************** +vector RareFactCommand::createGroupFile(vector& outputNames, map file2Group) { + try { + + vector newFileNames; + + //find different types of files + map > typesFiles; + for (int i = 0; i < outputNames.size(); i++) { + string extension = m->getExtension(outputNames[i]); + + ifstream in; + m->openInputFile(outputNames[i], in); + + string labels = m->getline(in); + string newLine = labels.substr(0, labels.find_first_of('\t')); + + newLine += "\tGroup" + labels.substr(labels.find_first_of('\t')); + + map >::iterator itfind = typesFiles.find(extension); + if (itfind != typesFiles.end()) { + (itfind->second)[outputNames[i]] = file2Group[i]; + }else { + map temp; + temp[outputNames[i]] = file2Group[i]; + typesFiles[extension] = temp; + } + + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; + + //print headers + ofstream out; + m->openOutputFile(combineFileName, out); + out << newLine << endl; + out.close(); + + } + + //for each type create a combo file + map lineToNumber; + for (map >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) { + + ofstream out; + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first; + m->openOutputFileAppend(combineFileName, out); + newFileNames.push_back(combineFileName); + map thisTypesFiles = it->second; + + //open each type summary file + map > files; //maps file name to lines in file + int maxLines = 0; + int numColumns = 0; + for (map::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) { + + string thisfilename = itFileNameGroup->first; + string group = itFileNameGroup->second; + + ifstream temp; + m->openInputFile(thisfilename, temp); + + //read through first line - labels + m->getline(temp); m->gobble(temp); + + vector thisFilesLines; + + thisFilesLines.push_back(group); + int count = 1; + while (!temp.eof()){ + + string thisLine = m->getline(temp); + + string numSampled = thisLine.substr(0, thisLine.find_first_of('\t')); + int num = 0; + convert(numSampled, num); + numColumns = m->getNumChar(thisLine, '\t'); + lineToNumber[count] = num; + count++; + + thisFilesLines.push_back(thisLine); + m->gobble(temp); + } + + files[thisfilename] = thisFilesLines; + + //save longest file for below + if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); } + + temp.close(); + m->mothurRemove(thisfilename); + } + + + //for each label + for (int k = 1; k < maxLines; k++) { + + //grab data for each group + for (map::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) { + + string thisfilename = itFileNameGroup->first; + map::iterator itLine = lineToNumber.find(k); + if (itLine != lineToNumber.end()) { + string output = toString(itLine->second); + if (k < files[thisfilename].size()) { + string line = files[thisfilename][k]; + output = line.substr(0, line.find_first_of('\t')); + output += '\t' + files[thisfilename][0] + '\t' + line.substr(line.find_first_of('\t')); + }else{ + output += '\t' + files[thisfilename][0] + '\t'; + for (int h = 0; h < numColumns; h++) { + output += "NA\t"; + } + } + out << output << endl; + }else { m->mothurOut("[ERROR]: parsing results, cant find " + toString(k)); m->mothurOutEndLine(); } + } + } + + out.close(); + + } + + //return combine file name + return newFileNames; + + } + catch(exception& e) { + m->errorOut(e, "RareFactCommand", "createGroupFile"); + exit(1); + } +} +//********************************************************************************************************************** vector RareFactCommand::parseSharedFile(string filename) { try { vector filenames; @@ -469,7 +611,7 @@ vector RareFactCommand::parseSharedFile(string filename) { //clears file before we start to write to it below for (int i=0; igetGroup() + ".rabund").c_str()); + m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund")); filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund")); } @@ -499,7 +641,7 @@ vector RareFactCommand::parseSharedFile(string filename) { } delete input; - m->Groups.clear(); + m->clearGroups(); return filenames; }