X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=sensspeccommand.cpp;h=12786ca41c848d967ffda72e2d06f382881d8eed;hp=213a2ca99da988257a46ba8e91cb5e4123b5bb0e;hb=d1c97b8c04bb75faca1e76ffad60b37a4d789d3d;hpb=e150b0b0664caec517485ee6d69dcdade6dcae77 diff --git a/sensspeccommand.cpp b/sensspeccommand.cpp index 213a2ca..12786ca 100644 --- a/sensspeccommand.cpp +++ b/sensspeccommand.cpp @@ -12,16 +12,15 @@ //********************************************************************************************************************** vector SensSpecCommand::setParameters(){ try { - CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist); - CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip); - //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname); - CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff); - CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); - CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist); + CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip); + CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff); + CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); + CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -45,6 +44,21 @@ string SensSpecCommand::getHelpString(){ } } //********************************************************************************************************************** +string SensSpecCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "sensspec") { pattern = "[filename],sensspec"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** SensSpecCommand::SensSpecCommand(){ try { abort = true; calledHelp = true; @@ -62,7 +76,8 @@ SensSpecCommand::SensSpecCommand(){ SensSpecCommand::SensSpecCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; + allLines = 1; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } @@ -115,16 +130,7 @@ SensSpecCommand::SensSpecCommand(string option) { path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["column"] = inputDir + it->second; } - } - - //it = parameters.find("name"); - //user has given a template file - //if(it != parameters.end()){ - //path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - //if (path == "") { parameters["name"] = inputDir + it->second; } - //} - + } } //check for required parameters listFile = validParameter.validFile(parameters, "list", true); @@ -134,16 +140,17 @@ SensSpecCommand::SensSpecCommand(string option) { else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; } } else if (listFile == "not open") { abort = true; } + else { m->setListFile(listFile); } phylipfile = validParameter.validFile(parameters, "phylip", true); if (phylipfile == "not found") { phylipfile = ""; } else if (phylipfile == "not open") { abort = true; } - else { distFile = phylipfile; format = "phylip"; } + else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column", true); if (columnfile == "not found") { columnfile = ""; } else if (columnfile == "not open") { abort = true; } - else { distFile = columnfile; format = "column"; } + else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); } if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? //give priority to column, then phylip @@ -174,23 +181,24 @@ SensSpecCommand::SensSpecCommand(string option) { else if(!m->isTrue(temp)) { hard = 0; } else if(m->isTrue(temp)) { hard = 1; } -// temp = validParameter.validFile(parameters, "name", true); -// if (temp == "not found") { nameFile = ""; } -// else if(temp == "not open") { abort = true; } -// else { nameFile = temp; } -// cout << "name:\t" << nameFile << endl; - temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); // cout << cutoff << endl; temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } - convert(temp, precision); + m->mothurConvert(temp, precision); // cout << precision << endl; - lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; } + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } - sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + ".sensspec"; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile)); + sensSpecFileName = getOutputFileName("sensspec",variables); } } catch(exception& e) { @@ -203,14 +211,27 @@ SensSpecCommand::SensSpecCommand(string option) { int SensSpecCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } - + + int startTime = time(NULL); + + //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file. + string newListFile = preProcessList(); + if (newListFile != "") { listFile = newListFile; } + setUpOutput(); outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName); if(format == "phylip") { processPhylip(); } else if(format == "column") { processColumn(); } + //remove temp file if created + if (newListFile != "") { m->mothurRemove(newListFile); } + + if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; } + + m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine(); + m->mothurOutEndLine(); - m->mothurOut("Output File Name: "); m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(sensSpecFileName); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -222,219 +243,421 @@ int SensSpecCommand::execute(){ exit(1); } } +//*************************************************************************************************************** +bool SensSpecCommand::testFile(){ + try{ + ifstream fileHandle; + m->openInputFile(phylipfile, fileHandle); + + bool square = false; + string numTest, name; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + + char d; + while((d=fileHandle.get()) != EOF){ + if(isalnum(d)){ + square = true; + break; + } + if(d == '\n'){ + square = false; + break; + } + } + fileHandle.close(); + + return square; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "testFile"); + exit(1); + } +} //*************************************************************************************************************** -void SensSpecCommand::processPhylip(){ +int SensSpecCommand::processPhylip(){ try{ //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file - - ifstream inputListFile; - m->openInputFile(listFile, inputListFile); - - string origCutoff = ""; + square = testFile(); + string origCutoff = ""; bool getCutoff = 0; if(cutoff == -1.00) { getCutoff = 1; } else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } - string label; - int numOTUs; - map seqMap; string seqList; - while(inputListFile){ - inputListFile >> label >> numOTUs; - for(int i=0;i> seqList; - int seqListLength = seqList.length(); - string seqName = ""; - for(int j=0;jgetLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if(m->control_pressed){ + for (int i = 0; i < outputNames.size(); i++){ m->mothurRemove(outputNames[i]); } delete list; return 0; + } + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); } - m->gobble(inputListFile); + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } + + lastLabel = list->getLabel(); + + delete list; + list = input.getListVector(); + } + + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + if (list != NULL) { delete list; } + list = input.getListVector(lastLabel); + + //process + fillSeqMap(seqMap, list); + process(seqMap, list->getLabel(), getCutoff, origCutoff); + + delete list; + } - int lNumSeqs = seqMap.size(); - int pNumSeqs = 0; + return 0; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "processPhylip"); + exit(1); + } +} - ifstream phylipFile; - m->openInputFile(distFile, phylipFile); - phylipFile >> pNumSeqs; - if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; } - - string seqName; - double distance; - vector otuIndices(lNumSeqs, -1); +//*************************************************************************************************************** + +int SensSpecCommand::fillSeqMap(map& seqMap, ListVector*& list){ + try { + //for each otu + for(int i=0;igetNumBins();i++){ + + if (m->control_pressed) { return 0; } + + string seqList = list->get(i); + int seqListLength = seqList.length(); + string seqName = ""; + + //parse bin by name, mapping each name to its otu number + for(int j=0;jerrorOut(e, "SensSpecCommand", "fillSeqMap"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::fillSeqPairSet(set& seqPairSet, ListVector*& list){ + try { + int numSeqs = 0; + + //for each otu + for(int i=0;igetNumBins();i++){ + + if (m->control_pressed) { return 0; } + + vector seqNameVector; + string bin = list->get(i); + m->splitAtComma(bin, seqNameVector); + + numSeqs += seqNameVector.size(); + + for(int j=0;jerrorOut(e, "SensSpecCommand", "fillSeqPairSet"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::process(map& seqMap, string label, bool& getCutoff, string& origCutoff){ + try { + + int lNumSeqs = seqMap.size(); + int pNumSeqs = 0; + + ifstream phylipFile; + m->openInputFile(distFile, phylipFile); + phylipFile >> pNumSeqs; + if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ } + + string seqName; + double distance; + vector otuIndices(lNumSeqs, -1); + + truePositives = 0; + falsePositives = 0; + trueNegatives = 0; + falseNegatives = 0; + + if(getCutoff == 1){ + if(label != "unique"){ + origCutoff = label; + convert(label, cutoff); + if(hard == 0){ cutoff += (0.49 / double(precision)); } + } + else{ + origCutoff = "unique"; + cutoff = 0.0000; + } + } + + m->mothurOut(label); m->mothurOutEndLine(); + + for(int i=0;icontrol_pressed) { return 0; } + + phylipFile >> seqName; + otuIndices[i] = seqMap[seqName]; - for(int i=0;i> seqName; - otuIndices[i] = seqMap[seqName]; + for(int j=0;j> distance; - for(int j=0;j> distance; - - if(distance <= cutoff){ - if(otuIndices[i] == otuIndices[j]) { truePositives++; } - else { falseNegatives++; } - } - else{ - if(otuIndices[i] == otuIndices[j]) { falsePositives++; } - else { trueNegatives++; } - } + if(distance <= cutoff){ + if(otuIndices[i] == otuIndices[j]) { truePositives++; } + else { falseNegatives++; } } + else{ + if(otuIndices[i] == otuIndices[j]) { falsePositives++; } + else { trueNegatives++; } + } + } + + if (square) { m->getline(phylipFile); } //get rest of line - redundant distances + m->gobble(phylipFile); + } + phylipFile.close(); + + outputStatistics(label, origCutoff); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "process"); + exit(1); + } +} +//*************************************************************************************************************** +int SensSpecCommand::process(set& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){ + try { + int numDists = (numSeqs * (numSeqs-1) / 2); + + ifstream columnFile; + m->openInputFile(distFile, columnFile); + string seqNameA, seqNameB, seqPairString; + double distance; + + truePositives = 0; + falsePositives = 0; + trueNegatives = numDists; + falseNegatives = 0; + + if(getCutoff == 1){ + if(label != "unique"){ + origCutoff = label; + convert(label, cutoff); + if(hard == 0){ cutoff += (0.49 / double(precision)); } + } + else{ + origCutoff = "unique"; + cutoff = 0.0000; + } + } + + m->mothurOut(label); m->mothurOutEndLine(); + + while(columnFile){ + columnFile >> seqNameA >> seqNameB >> distance; + if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; } + else { seqPairString = seqNameB + '\t' + seqNameA; } + + set::iterator it = seqPairSet.find(seqPairString); + + if(distance <= cutoff){ + if(it != seqPairSet.end()){ + truePositives++; + seqPairSet.erase(it); + } + else{ + falseNegatives++; + } + trueNegatives--; + } + else if(it != seqPairSet.end()){ + falsePositives++; + trueNegatives--; + seqPairSet.erase(it); } - phylipFile.close(); - outputStatistics(label, origCutoff); + m->gobble(columnFile); } - inputListFile.close(); - + falsePositives += seqPairSet.size(); + + outputStatistics(label, origCutoff); + + + return 0; } catch(exception& e) { - m->errorOut(e, "SensSpecCommand", "processPhylip"); + m->errorOut(e, "SensSpecCommand", "process"); exit(1); } } - //*************************************************************************************************************** -void SensSpecCommand::processColumn(){ +int SensSpecCommand::processColumn(){ try{ - ifstream inputListFile; - m->openInputFile(listFile, inputListFile); - string origCutoff = ""; bool getCutoff = 0; if(cutoff == -1.00) { getCutoff = 1; } else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } set seqPairSet; + int numSeqs = 0; + + InputData input(listFile, "list"); + ListVector* list = input.getListVector(); + string lastLabel = list->getLabel(); - string label, seqList; - int numOTUs; - int numSeqs; + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; - while(inputListFile){ - numSeqs = 0; + + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; } - inputListFile >> label >> numOTUs; - for(int i=0;igetLabel()) == 1){ - vector seqNameVector; + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); - inputListFile >> seqList; - int seqListLength = seqList.length(); - string seqName = ""; - for(int j=0;jgobble(inputListFile); - - int numDists = (numSeqs * (numSeqs-1) / 2); - - ifstream columnFile; - m->openInputFile(distFile, columnFile); - string seqNameA, seqNameB, seqPairString; - double distance; - - truePositives = 0; - falsePositives = 0; - trueNegatives = numDists; - falseNegatives = 0; - - if(getCutoff == 1){ - if(label != "unique"){ - origCutoff = label; - convert(label, cutoff); - if(hard == 0){ cutoff += (0.49 / double(precision)); } - } - else{ - origCutoff = "unique"; - cutoff = 0.0000; - } + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); } - cout << label << endl; + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } - while(columnFile){ - columnFile >> seqNameA >> seqNameB >> distance; - if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; } - else { seqPairString = seqNameB + '\t' + seqNameA; } - - set::iterator it = seqPairSet.find(seqPairString); + lastLabel = list->getLabel(); - if(distance <= cutoff){ - if(it != seqPairSet.end()){ - truePositives++; - seqPairSet.erase(it); - } - else{ - falseNegatives++; - } - trueNegatives--; - } - else if(it != seqPairSet.end()){ - falsePositives++; - trueNegatives--; - seqPairSet.erase(it); - } - - m->gobble(columnFile); + delete list; + list = input.getListVector(); + } + + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } - falsePositives += seqPairSet.size(); + } + + //run last label if you need to + if (needToRun == true) { + if (list != NULL) { delete list; } + list = input.getListVector(lastLabel); - outputStatistics(label, origCutoff); + //process + numSeqs = fillSeqPairSet(seqPairSet, list); + delete list; + process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); } + + return 0; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "processColumn"); @@ -507,6 +730,114 @@ void SensSpecCommand::outputStatistics(string label, string cutoff){ exit(1); } } +//*************************************************************************************************************** + +string SensSpecCommand::preProcessList(){ + try { + set uniqueNames; + //get unique names from distance file + if (format == "phylip") { + + ifstream phylipFile; + m->openInputFile(distFile, phylipFile); + string numTest; + int pNumSeqs; + phylipFile >> numTest; m->gobble(phylipFile); + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { + m->mothurConvert(numTest, pNumSeqs); + } + + string seqName; + for(int i=0;icontrol_pressed) { return ""; } + phylipFile >> seqName; m->getline(phylipFile); m->gobble(phylipFile); + uniqueNames.insert(seqName); + } + phylipFile.close(); + }else { + ifstream columnFile; + m->openInputFile(distFile, columnFile); + string seqNameA, seqNameB; + double distance; + + while(columnFile){ + if (m->control_pressed) { return ""; } + columnFile >> seqNameA >> seqNameB >> distance; + uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB); + m->gobble(columnFile); + } + columnFile.close(); + } + + //read list file, if numSeqs > unique names then remove redundant names + string newListFile = listFile + ".temp"; + ofstream out; + m->openOutputFile(newListFile, out); + ifstream in; + m->openInputFile(listFile, in); + + bool wroteSomething = false; + + while(!in.eof()){ + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; } + + //read in list vector + ListVector list(in); + + //listfile is already unique + if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; } + + //make a new list vector + ListVector newList; + newList.setLabel(list.getLabel()); + + //for each bin + for (int i = 0; i < list.getNumBins(); i++) { + + //parse out names that are in accnos file + string binnames = list.get(i); + vector bnames; + m->splitAtComma(binnames, bnames); + + string newNames = ""; + for (int j = 0; j < bnames.size(); j++) { + string name = bnames[j]; + //if that name is in the .accnos file, add it + if (uniqueNames.count(name) != 0) { newNames += name + ","; } + } + + //if there are names in this bin add to new list + if (newNames != "") { + newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma + newList.push_back(newNames); + } + } + + //print new listvector + if (newList.getNumBins() != 0) { + wroteSomething = true; + newList.print(out); + } + + m->gobble(in); + } + in.close(); + out.close(); + + if (wroteSomething) { return newListFile; } + else { m->mothurRemove(newListFile); } + + return ""; + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "preProcessList"); + exit(1); + } +} + //***************************************************************************************************************