X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mgclustercommand.cpp;h=477450475d479eb4308d9c2503086b9d7c03b051;hb=01f8d2c7d982a6209211f5abbcf2a086fdf60d0a;hp=945d116c035d663eff0a3bc2317fda9ba643b908;hpb=e51cb7e261265800fa2d2831b6516e33ebc7d78b;p=mothur.git diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index 945d116..4774504 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -14,6 +14,7 @@ vector MGClusterCommand::setParameters(){ try { CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount); CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength); CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty); CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff); @@ -61,6 +62,28 @@ string MGClusterCommand::getHelpString(){ } } //********************************************************************************************************************** +string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "list") { outputFileName = "list"; } + else if (type == "rabund") { outputFileName = "rabund"; } + else if (type == "sabund") { outputFileName = "sabund"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** MGClusterCommand::MGClusterCommand(){ try { abort = true; calledHelp = true; @@ -82,6 +105,7 @@ MGClusterCommand::MGClusterCommand(string option) { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { vector myArray = setParameters(); @@ -140,17 +164,25 @@ MGClusterCommand::MGClusterCommand(string option) { namefile = validParameter.validFile(parameters, "name", true); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } + else { m->setNameFile(namefile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; } if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; } //check for optional parameter and set defaults string temp; - temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } precisionLength = temp.length(); - convert(temp, precision); + m->mothurConvert(temp, precision); temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); cutoff += (5 / (precision * 10.0)); method = validParameter.validFile(parameters, "method", false); @@ -160,10 +192,10 @@ MGClusterCommand::MGClusterCommand(string option) { else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; } temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; } - convert(temp, length); + m->mothurConvert(temp, length); temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; } - convert(temp, penalty); + m->mothurConvert(temp, penalty); temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; } minWanted = m->isTrue(temp); @@ -175,7 +207,7 @@ MGClusterCommand::MGClusterCommand(string option) { hclusterWanted = m->isTrue(temp); temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; } - hard = m->isTrue(temp); + hard = m->isTrue(temp); } } @@ -187,7 +219,6 @@ MGClusterCommand::MGClusterCommand(string option) { //********************************************************************************************************************** int MGClusterCommand::execute(){ try { - if (abort == true) { if (calledHelp) { return 0; } return 2; } //read names file @@ -200,15 +231,29 @@ int MGClusterCommand::execute(){ string tag = ""; time_t start; float previousDist = 0.00000; - float rndPreviousDist = 0.00000; - + float rndPreviousDist = 0.00000; + //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector //must remember to delete those objects here since readBlast does not read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted); read->read(nameMap); - - list = new ListVector(nameMap->getListVector()); - RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); + + list = new ListVector(nameMap->getListVector()); + RAbundVector* rabund = NULL; + + if(countfile != "") { + //map nameMapCounts = m->readNames(namefile); + ct = new CountTable(); + ct->readTable(countfile); + rabund = new RAbundVector(); + createRabund(ct, list, rabund); + }else { + rabund = new RAbundVector(list->getRAbundVector()); + } + + + //list = new ListVector(nameMap->getListVector()); + //rabund = new RAbundVector(list->getRAbundVector()); if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; } @@ -221,21 +266,30 @@ int MGClusterCommand::execute(){ else if (method == "nearest") { tag = "nn"; } else { tag = "an"; } - //open output files - m->openOutputFile(fileroot+ tag + ".list", listFile); - m->openOutputFile(fileroot+ tag + ".rabund", rabundFile); - m->openOutputFile(fileroot+ tag + ".sabund", sabundFile); + string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund"); + string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund"); + string listFileName = fileroot+ tag + "."; + if (countfile != "") { listFileName += "unique_"; } + listFileName += getOutputFileNameTag("list"); + + if (countfile == "") { + m->openOutputFile(sabundFileName, sabundFile); + m->openOutputFile(rabundFileName, rabundFile); + } + m->openOutputFile(listFileName, listFile); if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } + double saveCutoff = cutoff; + if (!hclusterWanted) { //get distmatrix and overlap - SparseMatrix* distMatrix = read->getDistMatrix(); + SparseDistanceMatrix* distMatrix = read->getDistMatrix(); overlapMatrix = read->getOverlapMatrix(); //already sorted by read delete read; @@ -249,7 +303,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -261,7 +315,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -284,7 +338,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -315,7 +369,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -345,7 +399,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -362,7 +416,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -371,11 +425,16 @@ int MGClusterCommand::execute(){ seqs = hcluster->getSeqs(); + //to account for cutoff change in average neighbor + if (seqs.size() != 0) { + if (seqs[0].dist > cutoff) { break; } + } + if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -384,13 +443,13 @@ int MGClusterCommand::execute(){ if (seqs[i].seq1 != seqs[i].seq2) { - hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist); + cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist); if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -412,9 +471,9 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -448,9 +507,9 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -465,30 +524,40 @@ int MGClusterCommand::execute(){ } delete hcluster; - remove(distFile.c_str()); - remove(overlapFile.c_str()); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); } - delete list; + delete list; delete rabund; listFile.close(); - sabundFile.close(); - rabundFile.close(); - + if (countfile == "") { + sabundFile.close(); + rabundFile.close(); + } if (m->control_pressed) { delete nameMap; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list"); - m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund"); - m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund"); + m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); + if (countfile == "") { + m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); + m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); + } m->mothurOutEndLine(); + if (saveCutoff != cutoff) { + if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); } + else { saveCutoff = m->roundDist(saveCutoff, precision); } + + m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); + } + //set list file as new current listfile string current = ""; itTypes = outputTypes.find("list"); @@ -522,12 +591,14 @@ int MGClusterCommand::execute(){ void MGClusterCommand::printData(ListVector* mergedList){ try { mergedList->print(listFile); - mergedList->getRAbundVector().print(rabundFile); - - SAbundVector sabund = mergedList->getSAbundVector(); + SAbundVector sabund = mergedList->getSAbundVector(); + + if (countfile == "") { + mergedList->getRAbundVector().print(rabundFile); + sabund.print(sabundFile); + } sabund.print(cout); - sabund.print(sabundFile); } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "printData"); @@ -642,12 +713,12 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve try { //sort distFile string sortedDistFile = m->sortFile(unsortedDist, outputDir); - remove(unsortedDist.c_str()); //delete unsorted file + m->mothurRemove(unsortedDist); //delete unsorted file distFile = sortedDistFile; //sort overlap file string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir); - remove(unsortedOverlap.c_str()); //delete unsorted file + m->mothurRemove(unsortedOverlap); //delete unsorted file overlapFile = sortedOverlapFile; } catch(exception& e) { @@ -658,7 +729,33 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve //********************************************************************************************************************** +void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){ + try { + //vector names = ct.getNamesOfSeqs(); + //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); } + //return rav; + + for(int i = 0; i < list->getNumBins(); i++) { + vector binNames; + string bin = list->get(i); + m->splitAtComma(bin, binNames); + int total = 0; + for (int j = 0; j < binNames.size(); j++) { + total += ct->getNumSeqs(binNames[j]); + } + rabund->push_back(total); + } + + + } + catch(exception& e) { + m->errorOut(e, "MGClusterCommand", "createRabund"); + exit(1); + } + +} +//**********************************************************************************************************************