X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=sensspeccommand.cpp;h=f61232a26c133fb9d0cebfa9cdcca31fdfe2f0dc;hp=e647bf2430c629af70613b79abcaafc78ab325fa;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=e0fbf58358a72f20352cf2a43922ab6b5bdf0cf8 diff --git a/sensspeccommand.cpp b/sensspeccommand.cpp index e647bf2..f61232a 100644 --- a/sensspeccommand.cpp +++ b/sensspeccommand.cpp @@ -9,23 +9,84 @@ #include "sensspeccommand.h" +//********************************************************************************************************************** +vector SensSpecCommand::setParameters(){ + try { + CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist); + CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip); + CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff); + CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); + CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string SensSpecCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The sens.spec command....\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string SensSpecCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "sensspec") { pattern = "[filename],sensspec"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** +SensSpecCommand::SensSpecCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["sensspec"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "SensSpecCommand"); + exit(1); + } +} //*************************************************************************************************************** SensSpecCommand::SensSpecCommand(string option) { try { - abort = false; + abort = false; calledHelp = false; + allLines = 1; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { string temp; - - //valid paramters for this command - string AlignArray[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"}; - vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); @@ -38,6 +99,10 @@ SensSpecCommand::SensSpecCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["sensspec"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -46,7 +111,7 @@ SensSpecCommand::SensSpecCommand(string option) { it = parameters.find("list"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["list"] = inputDir + it->second; } } @@ -54,7 +119,7 @@ SensSpecCommand::SensSpecCommand(string option) { it = parameters.find("phylip"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["phylip"] = inputDir + it->second; } } @@ -62,65 +127,78 @@ SensSpecCommand::SensSpecCommand(string option) { it = parameters.find("column"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["column"] = inputDir + it->second; } - } - - it = parameters.find("name"); - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["name"] = inputDir + it->second; } - } - + } } //check for required parameters listFile = validParameter.validFile(parameters, "list", true); - if (listFile == "not found") { m->mothurOut("list is a required parameter for the sens.spec command."); m->mothurOutEndLine(); abort = true; } + if (listFile == "not found") { + listFile = m->getListFile(); + if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; } + } else if (listFile == "not open") { abort = true; } + else { m->setListFile(listFile); } + + phylipfile = validParameter.validFile(parameters, "phylip", true); + if (phylipfile == "not found") { phylipfile = ""; } + else if (phylipfile == "not open") { abort = true; } + else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); } + + columnfile = validParameter.validFile(parameters, "column", true); + if (columnfile == "not found") { columnfile = ""; } + else if (columnfile == "not open") { abort = true; } + else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); } + + if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? + //give priority to column, then phylip + columnfile = m->getColumnFile(); + if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); } + else { + phylipfile = m->getPhylipFile(); + if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); + abort = true; + } + } + }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; } + - distFile = validParameter.validFile(parameters, "column", true); - format = "column"; - if(distFile == "not found") { - distFile = validParameter.validFile(parameters, "phylip", true); - format = "phylip"; - } - if(distFile == "not found") { m->mothurOut("either column or phylip are required for the sens.spec command."); m->mothurOutEndLine(); abort = true; } - else if (distFile == "not open") { abort = true; } - //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; - outputDir += hasPath(listFile); //if user entered a file with a path then preserve it + outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it } //check for optional parameter and set defaults // ...at some point should added some additional type checking... temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found"){ hard = 0; } - else if(!isTrue(temp)) { hard = 0; } - else if(isTrue(temp)) { hard = 1; } - -// temp = validParameter.validFile(parameters, "name", true); -// if (temp == "not found") { nameFile = ""; } -// else if(temp == "not open") { abort = true; } -// else { nameFile = temp; } -// cout << "name:\t" << nameFile << endl; - + else if(!m->isTrue(temp)) { hard = 0; } + else if(m->isTrue(temp)) { hard = 1; } + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); // cout << cutoff << endl; temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } - convert(temp, precision); + m->mothurConvert(temp, precision); // cout << precision << endl; - lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; } + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } - sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec"; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile)); + sensSpecFileName = getOutputFileName("sensspec",variables); } } catch(exception& e) { @@ -128,40 +206,35 @@ SensSpecCommand::SensSpecCommand(string option) { exit(1); } } - -//********************************************************************************************************************** - -void SensSpecCommand::help(){ - try { - m->mothurOut("The sens.spec command reads a fastaFile and creates .....\n"); - - - - m->mothurOut("Example sens.spec(...).\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"); - m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n"); - - } - catch(exception& e) { - m->errorOut(e, "SensSpecCommand", "help"); - exit(1); - } -} - -//*************************************************************************************************************** - -SensSpecCommand::~SensSpecCommand(){ /* do nothing */ } - //*************************************************************************************************************** int SensSpecCommand::execute(){ try{ - if (abort == true) { return 0; } - + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + int startTime = time(NULL); + + //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file. + string newListFile = preProcessList(); + if (newListFile != "") { listFile = newListFile; } + setUpOutput(); + outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName); if(format == "phylip") { processPhylip(); } else if(format == "column") { processColumn(); } + //remove temp file if created + if (newListFile != "") { m->mothurRemove(newListFile); } + + if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; } + + m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine(); + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + m->mothurOut(sensSpecFileName); m->mothurOutEndLine(); + m->mothurOutEndLine(); + return 0; } @@ -173,216 +246,383 @@ int SensSpecCommand::execute(){ //*************************************************************************************************************** -void SensSpecCommand::processPhylip(){ +int SensSpecCommand::processPhylip(){ try{ //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file - - ifstream inputListFile; - openInputFile(listFile, inputListFile); - string origCutoff = ""; bool getCutoff = 0; if(cutoff == -1.00) { getCutoff = 1; } - else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } + else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } - string label; - int numOTUs; - map seqMap; string seqList; - while(inputListFile){ - inputListFile >> label >> numOTUs; - for(int i=0;i> seqList; - int seqListLength = seqList.length(); - string seqName = ""; - for(int j=0;jgetLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if(m->control_pressed){ + for (int i = 0; i < outputNames.size(); i++){ m->mothurRemove(outputNames[i]); } delete list; return 0; + } + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + } + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } + + lastLabel = list->getLabel(); + + delete list; + list = input.getListVector(); + } + + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } - gobble(inputListFile); + } + + //run last label if you need to + if (needToRun == true) { + if (list != NULL) { delete list; } + list = input.getListVector(lastLabel); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + + delete list; + } - int lNumSeqs = seqMap.size(); - int pNumSeqs = 0; + return 0; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "processPhylip"); + exit(1); + } +} + +//*************************************************************************************************************** - ifstream phylipFile; - openInputFile(distFile, phylipFile); - phylipFile >> pNumSeqs; - if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; } - - string seqName; - double distance; - vector otuIndices(lNumSeqs, -1); +int SensSpecCommand::fillSeqMap(map& seqMap, ListVector*& list){ + try { + //for each otu + for(int i=0;igetNumBins();i++){ + + if (m->control_pressed) { return 0; } + + string seqList = list->get(i); + int seqListLength = seqList.length(); + string seqName = ""; + + //parse bin by name, mapping each name to its otu number + for(int j=0;jerrorOut(e, "SensSpecCommand", "fillSeqMap"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::fillSeqPairSet(set& seqPairSet, ListVector*& list){ + try { + int numSeqs = 0; + + //for each otu + for(int i=0;igetNumBins();i++){ + + if (m->control_pressed) { return 0; } + + vector seqNameVector; + string bin = list->get(i); + m->splitAtComma(bin, seqNameVector); + + numSeqs += seqNameVector.size(); - for(int i=0;i> seqName; - otuIndices[i] = seqMap[seqName]; + for(int j=0;jerrorOut(e, "SensSpecCommand", "fillSeqPairSet"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::process(map& seqMap, string label, bool& getCutoff, string& origCutoff){ + try { + + int lNumSeqs = seqMap.size(); + int pNumSeqs = 0; + + ifstream phylipFile; + m->openInputFile(distFile, phylipFile); + phylipFile >> pNumSeqs; + if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ } + + string seqName; + double distance; + vector otuIndices(lNumSeqs, -1); + + truePositives = 0; + falsePositives = 0; + trueNegatives = 0; + falseNegatives = 0; + + if(getCutoff == 1){ + if(label != "unique"){ + origCutoff = label; + convert(label, cutoff); + if(hard == 0){ cutoff += (0.49 / double(precision)); } + } + else{ + origCutoff = "unique"; + cutoff = 0.0000; + } + } + + m->mothurOut(label); m->mothurOutEndLine(); + + for(int i=0;icontrol_pressed) { return 0; } + + phylipFile >> seqName; + otuIndices[i] = seqMap[seqName]; + + for(int j=0;j> distance; - for(int j=0;j> distance; - - if(distance <= cutoff){ - if(otuIndices[i] == otuIndices[j]) { truePositives++; } - else { falseNegatives++; } - } - else{ - if(otuIndices[i] == otuIndices[j]) { falsePositives++; } - else { trueNegatives++; } - } + if(distance <= cutoff){ + if(otuIndices[i] == otuIndices[j]) { truePositives++; } + else { falseNegatives++; } + } + else{ + if(otuIndices[i] == otuIndices[j]) { falsePositives++; } + else { trueNegatives++; } } } - phylipFile.close(); + } + phylipFile.close(); + + outputStatistics(label, origCutoff); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "process"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::process(set& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){ + try { + int numDists = (numSeqs * (numSeqs-1) / 2); + + ifstream columnFile; + m->openInputFile(distFile, columnFile); + string seqNameA, seqNameB, seqPairString; + double distance; + + truePositives = 0; + falsePositives = 0; + trueNegatives = numDists; + falseNegatives = 0; + + if(getCutoff == 1){ + if(label != "unique"){ + origCutoff = label; + convert(label, cutoff); + if(hard == 0){ cutoff += (0.49 / double(precision)); } + } + else{ + origCutoff = "unique"; + cutoff = 0.0000; + } + } + + m->mothurOut(label); m->mothurOutEndLine(); + + while(columnFile){ + columnFile >> seqNameA >> seqNameB >> distance; + if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; } + else { seqPairString = seqNameB + '\t' + seqNameA; } + + set::iterator it = seqPairSet.find(seqPairString); + + if(distance <= cutoff){ + if(it != seqPairSet.end()){ + truePositives++; + seqPairSet.erase(it); + } + else{ + falseNegatives++; + } + trueNegatives--; + } + else if(it != seqPairSet.end()){ + falsePositives++; + trueNegatives--; + seqPairSet.erase(it); + } - outputStatistics(label, origCutoff); + m->gobble(columnFile); } - inputListFile.close(); - + falsePositives += seqPairSet.size(); + + outputStatistics(label, origCutoff); + + + return 0; } catch(exception& e) { - m->errorOut(e, "SensSpecCommand", "processPhylip"); + m->errorOut(e, "SensSpecCommand", "process"); exit(1); } } - //*************************************************************************************************************** -void SensSpecCommand::processColumn(){ +int SensSpecCommand::processColumn(){ try{ - ifstream inputListFile; - openInputFile(listFile, inputListFile); - string origCutoff = ""; bool getCutoff = 0; if(cutoff == -1.00) { getCutoff = 1; } else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } set seqPairSet; + int numSeqs = 0; + + InputData input(listFile, "list"); + ListVector* list = input.getListVector(); + string lastLabel = list->getLabel(); - string label, seqList; - int numOTUs; - int numSeqs; + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; - while(inputListFile){ - numSeqs = 0; + + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - inputListFile >> label >> numOTUs; - for(int i=0;i seqNameVector; + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; } + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ - inputListFile >> seqList; - int seqListLength = seqList.length(); - string seqName = ""; - for(int j=0;jgetLabel()); + userLabels.erase(list->getLabel()); - int numSeqsInOTU = seqNameVector.size(); - for(int j=0;jgetLabel(), getCutoff, origCutoff, numSeqs); } - cout << label << endl; + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } - while(columnFile){ - columnFile >> seqNameA >> seqNameB >> distance; - if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; } - else { seqPairString = seqNameB + '\t' + seqNameA; } - - set::iterator it = seqPairSet.find(seqPairString); + lastLabel = list->getLabel(); - if(distance <= cutoff){ - if(it != seqPairSet.end()){ - truePositives++; - seqPairSet.erase(it); - } - else{ - falseNegatives++; - } - trueNegatives--; - } - else if(it != seqPairSet.end()){ - falsePositives++; - trueNegatives--; - seqPairSet.erase(it); - } - - gobble(columnFile); + delete list; + list = input.getListVector(); + } + + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } - falsePositives += seqPairSet.size(); + } + + //run last label if you need to + if (needToRun == true) { + if (list != NULL) { delete list; } + list = input.getListVector(lastLabel); - outputStatistics(label, origCutoff); + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + delete list; + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); } + + return 0; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "processColumn"); @@ -395,7 +635,7 @@ void SensSpecCommand::processColumn(){ void SensSpecCommand::setUpOutput(){ try{ ofstream sensSpecFile; - openOutputFile(sensSpecFileName, sensSpecFile); + m->openOutputFile(sensSpecFileName, sensSpecFile); sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; @@ -440,7 +680,7 @@ void SensSpecCommand::outputStatistics(string label, string cutoff){ if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; } ofstream sensSpecFile; - openOutputFileAppend(sensSpecFileName, sensSpecFile); + m->openOutputFileAppend(sensSpecFileName, sensSpecFile); sensSpecFile << label << '\t' << cutoff << '\t'; sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t'; @@ -455,6 +695,122 @@ void SensSpecCommand::outputStatistics(string label, string cutoff){ exit(1); } } +//*************************************************************************************************************** + +string SensSpecCommand::preProcessList(){ + try { + set uniqueNames; + //get unique names from distance file + if (format == "phylip") { + + ifstream phylipFile; + m->openInputFile(distFile, phylipFile); + string numTest; + int pNumSeqs; + phylipFile >> numTest; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { + m->mothurConvert(numTest, pNumSeqs); + } + phylipFile >> pNumSeqs; m->gobble(phylipFile); + + string seqName; + double distance; + + for(int i=0;icontrol_pressed) { return ""; } + + phylipFile >> seqName; + uniqueNames.insert(seqName); + + for(int j=0;j> distance; + } + m->gobble(phylipFile); + } + phylipFile.close(); + }else { + ifstream columnFile; + m->openInputFile(distFile, columnFile); + string seqNameA, seqNameB; + double distance; + + while(columnFile){ + if (m->control_pressed) { return ""; } + columnFile >> seqNameA >> seqNameB >> distance; + uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB); + m->gobble(columnFile); + } + columnFile.close(); + } + + //read list file, if numSeqs > unique names then remove redundant names + string newListFile = listFile + ".temp"; + ofstream out; + m->openOutputFile(newListFile, out); + ifstream in; + m->openInputFile(listFile, in); + + bool wroteSomething = false; + + while(!in.eof()){ + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; } + + //read in list vector + ListVector list(in); + + //listfile is already unique + if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; } + + //make a new list vector + ListVector newList; + newList.setLabel(list.getLabel()); + + //for each bin + for (int i = 0; i < list.getNumBins(); i++) { + + //parse out names that are in accnos file + string binnames = list.get(i); + vector bnames; + m->splitAtComma(binnames, bnames); + + string newNames = ""; + for (int i = 0; i < bnames.size(); i++) { + string name = bnames[i]; + //if that name is in the .accnos file, add it + if (uniqueNames.count(name) != 0) { newNames += name + ","; } + } + + //if there are names in this bin add to new list + if (newNames != "") { + newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma + newList.push_back(newNames); + } + } + + //print new listvector + if (newList.getNumBins() != 0) { + wroteSomething = true; + newList.print(out); + } + + m->gobble(in); + } + in.close(); + out.close(); + + if (wroteSomething) { return newListFile; } + return ""; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "preProcessList"); + exit(1); + } +} + //***************************************************************************************************************