X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sensspeccommand.cpp;h=cfa1f5b6dc0649925ff4762abdef070e93fd7688;hb=c7e8c2d15bd7cedcfdf18675cb0ea1a0dcd0e3c0;hp=726c9045b20dfd7ac907b5e73f0692e3d0835c69;hpb=7bf9a81bba76538ecaf351ae208de3da4bf1b6dd;p=mothur.git diff --git a/sensspeccommand.cpp b/sensspeccommand.cpp index 726c904..cfa1f5b 100644 --- a/sensspeccommand.cpp +++ b/sensspeccommand.cpp @@ -19,7 +19,7 @@ vector SensSpecCommand::setParameters(){ CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); - CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard); + CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -45,6 +45,26 @@ string SensSpecCommand::getHelpString(){ } } //********************************************************************************************************************** +string SensSpecCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "sensspec") { outputFileName = "sensspec"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** SensSpecCommand::SensSpecCommand(){ try { abort = true; calledHelp = true; @@ -62,7 +82,8 @@ SensSpecCommand::SensSpecCommand(){ SensSpecCommand::SensSpecCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; + allLines = 1; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } @@ -182,16 +203,21 @@ SensSpecCommand::SensSpecCommand(string option) { // cout << "name:\t" << nameFile << endl; temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); // cout << cutoff << endl; temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } - convert(temp, precision); + m->mothurConvert(temp, precision); // cout << precision << endl; - lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; } + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } - sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + ".sensspec"; + sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + getOutputFileNameTag("sensspec"); } } catch(exception& e) { @@ -210,6 +236,8 @@ int SensSpecCommand::execute(){ if(format == "phylip") { processPhylip(); } else if(format == "column") { processColumn(); } + if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; } + m->mothurOutEndLine(); m->mothurOut("Output File Name: "); m->mothurOutEndLine(); m->mothurOut(sensSpecFileName); m->mothurOutEndLine(); @@ -226,216 +254,383 @@ int SensSpecCommand::execute(){ //*************************************************************************************************************** -void SensSpecCommand::processPhylip(){ +int SensSpecCommand::processPhylip(){ try{ //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file - - ifstream inputListFile; - m->openInputFile(listFile, inputListFile); - string origCutoff = ""; bool getCutoff = 0; if(cutoff == -1.00) { getCutoff = 1; } - else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } + else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } - string label; - int numOTUs; - map seqMap; string seqList; - while(inputListFile){ - inputListFile >> label >> numOTUs; - for(int i=0;i> seqList; - int seqListLength = seqList.length(); - string seqName = ""; - for(int j=0;jgetLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if(m->control_pressed){ + for (int i = 0; i < outputNames.size(); i++){ m->mothurRemove(outputNames[i]); } delete list; return 0; + } + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + } + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } + + lastLabel = list->getLabel(); + + delete list; + list = input.getListVector(); + } + + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } - m->gobble(inputListFile); + } - int lNumSeqs = seqMap.size(); - int pNumSeqs = 0; + //run last label if you need to + if (needToRun == true) { + if (list != NULL) { delete list; } + list = input.getListVector(lastLabel); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + + delete list; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "processPhylip"); + exit(1); + } +} - ifstream phylipFile; - m->openInputFile(distFile, phylipFile); - phylipFile >> pNumSeqs; - if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; } - - string seqName; - double distance; - vector otuIndices(lNumSeqs, -1); +//*************************************************************************************************************** + +int SensSpecCommand::fillSeqMap(map& seqMap, ListVector*& list){ + try { + //for each otu + for(int i=0;igetNumBins();i++){ + + if (m->control_pressed) { return 0; } + + string seqList = list->get(i); + int seqListLength = seqList.length(); + string seqName = ""; + + //parse bin by name, mapping each name to its otu number + for(int j=0;jerrorOut(e, "SensSpecCommand", "fillSeqMap"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::fillSeqPairSet(set& seqPairSet, ListVector*& list){ + try { + int numSeqs = 0; + + //for each otu + for(int i=0;igetNumBins();i++){ + + if (m->control_pressed) { return 0; } + + vector seqNameVector; + string bin = list->get(i); + m->splitAtComma(bin, seqNameVector); + + numSeqs += seqNameVector.size(); + + for(int j=0;jerrorOut(e, "SensSpecCommand", "fillSeqPairSet"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::process(map& seqMap, string label, bool& getCutoff, string& origCutoff){ + try { + + int lNumSeqs = seqMap.size(); + int pNumSeqs = 0; + + ifstream phylipFile; + m->openInputFile(distFile, phylipFile); + phylipFile >> pNumSeqs; + if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ } + + string seqName; + double distance; + vector otuIndices(lNumSeqs, -1); + + truePositives = 0; + falsePositives = 0; + trueNegatives = 0; + falseNegatives = 0; + + if(getCutoff == 1){ + if(label != "unique"){ + origCutoff = label; + convert(label, cutoff); + if(hard == 0){ cutoff += (0.49 / double(precision)); } + } + else{ + origCutoff = "unique"; + cutoff = 0.0000; + } + } + + m->mothurOut(label); m->mothurOutEndLine(); + + for(int i=0;icontrol_pressed) { return 0; } + + phylipFile >> seqName; + otuIndices[i] = seqMap[seqName]; - for(int i=0;i> seqName; - otuIndices[i] = seqMap[seqName]; + for(int j=0;j> distance; - for(int j=0;j> distance; - - if(distance <= cutoff){ - if(otuIndices[i] == otuIndices[j]) { truePositives++; } - else { falseNegatives++; } - } - else{ - if(otuIndices[i] == otuIndices[j]) { falsePositives++; } - else { trueNegatives++; } - } + if(distance <= cutoff){ + if(otuIndices[i] == otuIndices[j]) { truePositives++; } + else { falseNegatives++; } + } + else{ + if(otuIndices[i] == otuIndices[j]) { falsePositives++; } + else { trueNegatives++; } + } + } + } + phylipFile.close(); + + outputStatistics(label, origCutoff); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "process"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::process(set& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){ + try { + int numDists = (numSeqs * (numSeqs-1) / 2); + + ifstream columnFile; + m->openInputFile(distFile, columnFile); + string seqNameA, seqNameB, seqPairString; + double distance; + + truePositives = 0; + falsePositives = 0; + trueNegatives = numDists; + falseNegatives = 0; + + if(getCutoff == 1){ + if(label != "unique"){ + origCutoff = label; + convert(label, cutoff); + if(hard == 0){ cutoff += (0.49 / double(precision)); } + } + else{ + origCutoff = "unique"; + cutoff = 0.0000; + } + } + + m->mothurOut(label); m->mothurOutEndLine(); + + while(columnFile){ + columnFile >> seqNameA >> seqNameB >> distance; + if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; } + else { seqPairString = seqNameB + '\t' + seqNameA; } + + set::iterator it = seqPairSet.find(seqPairString); + + if(distance <= cutoff){ + if(it != seqPairSet.end()){ + truePositives++; + seqPairSet.erase(it); + } + else{ + falseNegatives++; } + trueNegatives--; + } + else if(it != seqPairSet.end()){ + falsePositives++; + trueNegatives--; + seqPairSet.erase(it); } - phylipFile.close(); - outputStatistics(label, origCutoff); + m->gobble(columnFile); } - inputListFile.close(); - + falsePositives += seqPairSet.size(); + + outputStatistics(label, origCutoff); + + + return 0; } catch(exception& e) { - m->errorOut(e, "SensSpecCommand", "processPhylip"); + m->errorOut(e, "SensSpecCommand", "process"); exit(1); } } - //*************************************************************************************************************** -void SensSpecCommand::processColumn(){ +int SensSpecCommand::processColumn(){ try{ - ifstream inputListFile; - m->openInputFile(listFile, inputListFile); - string origCutoff = ""; bool getCutoff = 0; if(cutoff == -1.00) { getCutoff = 1; } else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } set seqPairSet; + int numSeqs = 0; + + InputData input(listFile, "list"); + ListVector* list = input.getListVector(); + string lastLabel = list->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; - string label, seqList; - int numOTUs; - int numSeqs; - while(inputListFile){ - numSeqs = 0; + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - inputListFile >> label >> numOTUs; - for(int i=0;i seqNameVector; + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; } + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ - inputListFile >> seqList; - int seqListLength = seqList.length(); - string seqName = ""; - for(int j=0;jgetLabel()); + userLabels.erase(list->getLabel()); - int numSeqsInOTU = seqNameVector.size(); - for(int j=0;jgobble(inputListFile); - - int numDists = (numSeqs * (numSeqs-1) / 2); - - ifstream columnFile; - m->openInputFile(distFile, columnFile); - string seqNameA, seqNameB, seqPairString; - double distance; - - truePositives = 0; - falsePositives = 0; - trueNegatives = numDists; - falseNegatives = 0; - - if(getCutoff == 1){ - if(label != "unique"){ - origCutoff = label; - convert(label, cutoff); - if(hard == 0){ cutoff += (0.49 / double(precision)); } - } - else{ - origCutoff = "unique"; - cutoff = 0.0000; - } + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); } - cout << label << endl; + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } - while(columnFile){ - columnFile >> seqNameA >> seqNameB >> distance; - if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; } - else { seqPairString = seqNameB + '\t' + seqNameA; } - - set::iterator it = seqPairSet.find(seqPairString); + lastLabel = list->getLabel(); - if(distance <= cutoff){ - if(it != seqPairSet.end()){ - truePositives++; - seqPairSet.erase(it); - } - else{ - falseNegatives++; - } - trueNegatives--; - } - else if(it != seqPairSet.end()){ - falsePositives++; - trueNegatives--; - seqPairSet.erase(it); - } - - m->gobble(columnFile); + delete list; + list = input.getListVector(); + } + + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } - falsePositives += seqPairSet.size(); + } + + //run last label if you need to + if (needToRun == true) { + if (list != NULL) { delete list; } + list = input.getListVector(lastLabel); - outputStatistics(label, origCutoff); + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + delete list; + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); } + + return 0; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "processColumn");