X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mgclustercommand.cpp;h=508b05f01d432a83d387b9b5b072d59a4b0fdcd5;hb=9eb79e6942cf98f3a0296ff9f63b4a47731b49e8;hp=5a790afd45bbfcc2c92d23827aa58a184f257cee;hpb=e150b0b0664caec517485ee6d69dcdade6dcae77;p=mothur.git diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index 5a790af..508b05f 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -14,6 +14,7 @@ vector MGClusterCommand::setParameters(){ try { CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); + CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge); CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength); CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty); CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff); @@ -61,6 +62,28 @@ string MGClusterCommand::getHelpString(){ } } //********************************************************************************************************************** +string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "list") { outputFileName = "list"; } + else if (type == "rabund") { outputFileName = "rabund"; } + else if (type == "sabund") { outputFileName = "sabund"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** MGClusterCommand::MGClusterCommand(){ try { abort = true; calledHelp = true; @@ -141,17 +164,21 @@ MGClusterCommand::MGClusterCommand(string option) { namefile = validParameter.validFile(parameters, "name", true); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } + else { m->setNameFile(namefile); } if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; } //check for optional parameter and set defaults string temp; + temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "false"; } + large = m->isTrue(temp); + temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } precisionLength = temp.length(); - convert(temp, precision); + m->mothurConvert(temp, precision); temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); cutoff += (5 / (precision * 10.0)); method = validParameter.validFile(parameters, "method", false); @@ -161,10 +188,10 @@ MGClusterCommand::MGClusterCommand(string option) { else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; } temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; } - convert(temp, length); + m->mothurConvert(temp, length); temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; } - convert(temp, penalty); + m->mothurConvert(temp, penalty); temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; } minWanted = m->isTrue(temp); @@ -176,7 +203,7 @@ MGClusterCommand::MGClusterCommand(string option) { hclusterWanted = m->isTrue(temp); temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; } - hard = m->isTrue(temp); + hard = m->isTrue(temp); } } @@ -188,7 +215,7 @@ MGClusterCommand::MGClusterCommand(string option) { //********************************************************************************************************************** int MGClusterCommand::execute(){ try { - + cout << "1" << endl; if (abort == true) { if (calledHelp) { return 0; } return 2; } //read names file @@ -201,15 +228,27 @@ int MGClusterCommand::execute(){ string tag = ""; time_t start; float previousDist = 0.00000; - float rndPreviousDist = 0.00000; - + float rndPreviousDist = 0.00000; + //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector //must remember to delete those objects here since readBlast does not read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted); read->read(nameMap); - - list = new ListVector(nameMap->getListVector()); - RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); + + list = new ListVector(nameMap->getListVector()); + RAbundVector* rabund = NULL; + + if(large) { + map nameMapCounts = m->readNames(namefile); + createRabund(nameMapCounts); + rabund = &rav; + }else { + rabund = new RAbundVector(list->getRAbundVector()); + } + + + //list = new ListVector(nameMap->getListVector()); + //rabund = new RAbundVector(list->getRAbundVector()); if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; } @@ -222,18 +261,23 @@ int MGClusterCommand::execute(){ else if (method == "nearest") { tag = "nn"; } else { tag = "an"; } - //open output files - m->openOutputFile(fileroot+ tag + ".list", listFile); - m->openOutputFile(fileroot+ tag + ".rabund", rabundFile); - m->openOutputFile(fileroot+ tag + ".sabund", sabundFile); + string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund"); + string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund"); + string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list"); + + m->openOutputFile(sabundFileName, sabundFile); + m->openOutputFile(rabundFileName, rabundFile); + m->openOutputFile(listFileName, listFile); if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } + double saveCutoff = cutoff; + if (!hclusterWanted) { //get distmatrix and overlap SparseMatrix* distMatrix = read->getDistMatrix(); @@ -250,7 +294,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } @@ -262,7 +306,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } @@ -285,7 +329,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } @@ -316,7 +360,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } @@ -346,7 +390,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } @@ -363,7 +407,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } @@ -372,11 +416,16 @@ int MGClusterCommand::execute(){ seqs = hcluster->getSeqs(); + //to account for cutoff change in average neighbor + if (seqs.size() != 0) { + if (seqs[0].dist > cutoff) { break; } + } + if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -385,13 +434,13 @@ int MGClusterCommand::execute(){ if (seqs[i].seq1 != seqs[i].seq2) { - hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist); + cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist); if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -413,9 +462,9 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -449,9 +498,9 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); - remove(distFile.c_str()); - remove(overlapFile.c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } @@ -466,30 +515,37 @@ int MGClusterCommand::execute(){ } delete hcluster; - remove(distFile.c_str()); - remove(overlapFile.c_str()); + m->mothurRemove(distFile); + m->mothurRemove(overlapFile); } - delete list; - delete rabund; + delete list; + if (!large) {delete rabund;} listFile.close(); sabundFile.close(); rabundFile.close(); if (m->control_pressed) { delete nameMap; - listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str()); + listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list"); - m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund"); - m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund"); + m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); + m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); + m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); m->mothurOutEndLine(); + if (saveCutoff != cutoff) { + if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); } + else { saveCutoff = m->roundDist(saveCutoff, precision); } + + m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); + } + //set list file as new current listfile string current = ""; itTypes = outputTypes.find("list"); @@ -643,12 +699,12 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve try { //sort distFile string sortedDistFile = m->sortFile(unsortedDist, outputDir); - remove(unsortedDist.c_str()); //delete unsorted file + m->mothurRemove(unsortedDist); //delete unsorted file distFile = sortedDistFile; //sort overlap file string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir); - remove(unsortedOverlap.c_str()); //delete unsorted file + m->mothurRemove(unsortedOverlap); //delete unsorted file overlapFile = sortedOverlapFile; } catch(exception& e) { @@ -659,7 +715,22 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve //********************************************************************************************************************** +void MGClusterCommand::createRabund(map nameMapCounts){ + try { + //RAbundVector rav; + map::iterator it; + //it = nameMapCounts.begin(); + //for(int i = 0; i < list->getNumBins(); i++) { rav.push_back((*it).second); it++; } + for ( it=nameMapCounts.begin(); it!=nameMapCounts.end(); it++ ) { rav.push_back( it->second ); } + //return rav; + } + catch(exception& e) { + m->errorOut(e, "MGClusterCommand", "createRabund"); + exit(1); + } + +} - +//**********************************************************************************************************************