X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=getoturepcommand.cpp;h=1e09dba67dc7f1d9b0b10c5a5dfa79023b04a3a6;hb=8bc3e5b38c2317a1715f53be22fa96455868c281;hp=9dfc8bdeadc83c9784a42c10d66b26efcd52e8d4;hpb=a8367302932de9be5434e77f6e5829d7609e2aec;p=mothur.git diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index 9dfc8bd..1e09dba 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -36,6 +36,54 @@ inline bool compareGroup(repStruct left, repStruct right){ return (left.group < right.group); } //********************************************************************************************************************** +GetOTURepCommand::GetOTURepCommand(){ + try { + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["name"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector GetOTURepCommand::getValidParameters(){ + try { + string Array[] = {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "GetOTURepCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector GetOTURepCommand::getRequiredParameters(){ + try { + string Array[] = {"fasta","list"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "GetOTURepCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector GetOTURepCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "GetOTURepCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** GetOTURepCommand::GetOTURepCommand(string option) { try{ globaldata = GlobalData::getInstance(); @@ -62,6 +110,11 @@ GetOTURepCommand::GetOTURepCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["name"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -70,7 +123,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { it = parameters.find("list"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["list"] = inputDir + it->second; } } @@ -78,7 +131,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { it = parameters.find("fasta"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["fasta"] = inputDir + it->second; } } @@ -86,7 +139,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { it = parameters.find("phylip"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["phylip"] = inputDir + it->second; } } @@ -94,7 +147,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { it = parameters.find("column"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["column"] = inputDir + it->second; } } @@ -102,7 +155,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { it = parameters.find("name"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } @@ -110,7 +163,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { it = parameters.find("group"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["group"] = inputDir + it->second; } } @@ -153,7 +206,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { label = validParameter.validFile(parameters, "label", false); if (label == "not found") { label = ""; allLines = 1; } else { - if(label != "all") { splitAtDash(label, labels); allLines = 0; } + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } @@ -179,13 +232,13 @@ GetOTURepCommand::GetOTURepCommand(string option) { m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine(); abort = true; }else { - splitAtDash(groups, Groups); + m->splitAtDash(groups, Groups); } } globaldata->Groups = Groups; string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; } - large = isTrue(temp); + large = m->isTrue(temp); temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } convert(temp, precision); @@ -214,6 +267,7 @@ void GetOTURepCommand::help(){ m->mothurOut("The default value for label is all labels in your inputfile.\n"); m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n"); m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n"); + m->mothurOut("The group parameter allows you provide a group file.\n"); m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n"); m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n"); m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n"); @@ -306,7 +360,7 @@ int GetOTURepCommand::execute(){ delete nameMap; //openfile for getMap to use - openInputFile(distFile, inRow); + m->openInputFile(distFile, inRow); if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; } } @@ -321,7 +375,7 @@ int GetOTURepCommand::execute(){ names.clear(); binnames = globaldata->gListVector->get(i); - splitAtComma(binnames, names); + m->splitAtComma(binnames, names); for (int j = 0; j < names.size(); j++) { nameToIndex[names[j]] = i; @@ -377,7 +431,7 @@ int GetOTURepCommand::execute(){ if (m->control_pressed) { if (large) { inRow.close(); remove(distFile.c_str()); } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; } @@ -385,7 +439,7 @@ int GetOTURepCommand::execute(){ userLabels.erase(list->getLabel()); } - if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; @@ -396,7 +450,7 @@ int GetOTURepCommand::execute(){ if (m->control_pressed) { if (large) { inRow.close(); remove(distFile.c_str()); } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; } @@ -436,7 +490,7 @@ int GetOTURepCommand::execute(){ if (m->control_pressed) { if (large) { inRow.close(); remove(distFile.c_str()); } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; } } @@ -488,7 +542,7 @@ int GetOTURepCommand::execute(){ void GetOTURepCommand::readNamesFile() { try { vector dupNames; - openInputFile(namefile, inNames); + m->openInputFile(namefile, inNames); string name, names, sequence; @@ -499,7 +553,7 @@ void GetOTURepCommand::readNamesFile() { dupNames.clear(); //parse names into vector - splitAtComma(names, dupNames); + m->splitAtComma(names, dupNames); //store names in fasta map sequence = fasta->getSequence(name); @@ -507,7 +561,7 @@ void GetOTURepCommand::readNamesFile() { fasta->push_back(dupNames[i], sequence); } - gobble(inNames); + m->gobble(inNames); } inNames.close(); @@ -522,7 +576,7 @@ string GetOTURepCommand::findRep(vector names) { try{ // if only 1 sequence in bin or processing the "unique" label, then // the first sequence of the OTU is the representative one - if ((names.size() == 1) || (list->getLabel() == "unique")) { + if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) { return names[0]; }else{ vector seqIndex(names.size()); @@ -597,26 +651,26 @@ int GetOTURepCommand::process(ListVector* processList) { string nameRep; //create output file - if (outputDir == "") { outputDir += hasPath(listfile); } + if (outputDir == "") { outputDir += m->hasPath(listfile); } ofstream newNamesOutput; string outputNamesFile; map filehandles; if (Groups.size() == 0) { //you don't want to use groups - outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names"; - openOutputFile(outputNamesFile, newNamesOutput); - outputNames.push_back(outputNamesFile); + outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".rep.names"; + m->openOutputFile(outputNamesFile, newNamesOutput); + outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); outputNameFiles[outputNamesFile] = processList->getLabel(); }else{ //you want to use groups ofstream* temp; for (int i=0; igetLabel() + "." + Groups[i] + ".rep.names"; + outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names"; - openOutputFile(outputNamesFile, *(temp)); - outputNames.push_back(outputNamesFile); + m->openOutputFile(outputNamesFile, *(temp)); + outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i]; } } @@ -638,7 +692,7 @@ int GetOTURepCommand::process(ListVector* processList) { string temp = processList->get(i); vector namesInBin; - splitAtComma(temp, namesInBin); + m->splitAtComma(temp, namesInBin); if (Groups.size() == 0) { nameRep = findRep(namesInBin); @@ -654,7 +708,7 @@ int GetOTURepCommand::process(ListVector* processList) { if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; } - if (inUsersGroups(thisgroup, Groups)) { //add this name to correct group + if (m->inUsersGroups(thisgroup, Groups)) { //add this name to correct group NamesInGroup[thisgroup].push_back(namesInBin[j]); } } @@ -700,27 +754,27 @@ int GetOTURepCommand::processNames(string filename, string label) { try{ //create output file - if (outputDir == "") { outputDir += hasPath(listfile); } - string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + label + ".rep.fasta"; - openOutputFile(outputFileName, out); + if (outputDir == "") { outputDir += m->hasPath(listfile); } + string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + ".rep.fasta"; + m->openOutputFile(outputFileName, out); vector reps; - outputNames.push_back(outputFileName); + outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); ofstream out2; string tempNameFile = filename + ".temp"; - openOutputFile(tempNameFile, out2); + m->openOutputFile(tempNameFile, out2); ifstream in; - openInputFile(filename, in); + m->openInputFile(filename, in); int i = 0; while (!in.eof()) { string rep, binnames; - in >> i >> rep >> binnames; gobble(in); + in >> i >> rep >> binnames; m->gobble(in); out2 << rep << '\t' << binnames << endl; vector names; - splitAtComma(binnames, names); + m->splitAtComma(binnames, names); int binsize = names.size(); //if you have a groupfile @@ -754,7 +808,7 @@ int GetOTURepCommand::processNames(string filename, string label) { if (sequence != "not found") { if (sorted == "") { //print them out - rep = rep + "|" + toString(i+1); + rep = rep + "\t" + toString(i+1); rep = rep + "|" + toString(binsize); if (groupfile != "") { rep = rep + "|" + group; @@ -780,7 +834,7 @@ int GetOTURepCommand::processNames(string filename, string label) { //print them for (int i = 0; i < reps.size(); i++) { string sequence = fasta->getSequence(reps[i].name); - string outputName = reps[i].name + "|" + toString(reps[i].bin); + string outputName = reps[i].name + "\t" + toString(reps[i].bin); outputName = outputName + "|" + toString(reps[i].size); if (groupfile != "") { outputName = outputName + "|" + reps[i].group; @@ -789,10 +843,12 @@ int GetOTURepCommand::processNames(string filename, string label) { out << sequence << endl; } } - + + in.close(); out.close(); out2.close(); + remove(filename.c_str()); rename(tempNameFile.c_str(), filename.c_str()); return 0;