X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=mgclustercommand.cpp;h=24c4f27777d21ed88b6f3b41411394502802404e;hp=a0f33d6f8bba8c2976b0a969924c4082c98ca67a;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=e98d569d630c30d1ac3608eb6337bcec4765a724 diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index a0f33d6..24c4f27 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -12,19 +12,20 @@ //********************************************************************************************************************** vector MGClusterCommand::setParameters(){ try { - CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength); - CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty); - CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff); - CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); - CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod); - CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard); - CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin); - CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge); - CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength); + CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty); + CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff); + CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); + CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod); + CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard); + CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin); + CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge); + CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -61,28 +62,23 @@ string MGClusterCommand::getHelpString(){ } } //********************************************************************************************************************** -string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string MGClusterCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "list") { outputFileName = "list"; } - else if (type == "rabund") { outputFileName = "rabund"; } - else if (type == "sabund") { outputFileName = "sabund"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } + else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; } + else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "MGClusterCommand", "getOutputPattern"); + exit(1); + } } -//********************************************************************************************************************** +//******************************************************************************************************************* MGClusterCommand::MGClusterCommand(){ try { abort = true; calledHelp = true; @@ -146,6 +142,14 @@ MGClusterCommand::MGClusterCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } @@ -164,12 +168,19 @@ MGClusterCommand::MGClusterCommand(string option) { if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { m->setNameFile(namefile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; } if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; } //check for optional parameter and set defaults string temp; - temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } precisionLength = temp.length(); m->mothurConvert(temp, precision); @@ -199,7 +210,7 @@ MGClusterCommand::MGClusterCommand(string option) { hclusterWanted = m->isTrue(temp); temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; } - hard = m->isTrue(temp); + hard = m->isTrue(temp); } } @@ -211,7 +222,6 @@ MGClusterCommand::MGClusterCommand(string option) { //********************************************************************************************************************** int MGClusterCommand::execute(){ try { - if (abort == true) { if (calledHelp) { return 0; } return 2; } //read names file @@ -225,7 +235,7 @@ int MGClusterCommand::execute(){ time_t start; float previousDist = 0.00000; float rndPreviousDist = 0.00000; - + //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector //must remember to delete those objects here since readBlast does not read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted); @@ -233,16 +243,20 @@ int MGClusterCommand::execute(){ list = new ListVector(nameMap->getListVector()); RAbundVector* rabund = NULL; - if(large) { - map nameMapCounts = m->readNames(namefile); - RAbundVector* rabund = createRabund(list, nameMapCounts); + + if(countfile != "") { + //map nameMapCounts = m->readNames(namefile); + ct = new CountTable(); + ct->readTable(countfile); + rabund = new RAbundVector(); + createRabund(ct, list, rabund); }else { - RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); + rabund = new RAbundVector(list->getRAbundVector()); } //list = new ListVector(nameMap->getListVector()); - //RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); + //rabund = new RAbundVector(list->getRAbundVector()); if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; } @@ -255,17 +269,23 @@ int MGClusterCommand::execute(){ else if (method == "nearest") { tag = "nn"; } else { tag = "an"; } - string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund"); - string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund"); - string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list"); + map variables; + variables["[filename]"] = fileroot; + variables["[clustertag]"] = tag; + string sabundFileName = getOutputFileName("sabund", variables); + string rabundFileName = getOutputFileName("rabund", variables); + if (countfile != "") { variables["[tag2]"] = "unique_list"; } + string listFileName = getOutputFileName("list", variables); - m->openOutputFile(sabundFileName, sabundFile); - m->openOutputFile(rabundFileName, rabundFile); + if (countfile == "") { + m->openOutputFile(sabundFileName, sabundFile); + m->openOutputFile(rabundFileName, rabundFile); + } m->openOutputFile(listFileName, listFile); if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -274,7 +294,7 @@ int MGClusterCommand::execute(){ if (!hclusterWanted) { //get distmatrix and overlap - SparseMatrix* distMatrix = read->getDistMatrix(); + SparseDistanceMatrix* distMatrix = read->getDistMatrix(); overlapMatrix = read->getOverlapMatrix(); //already sorted by read delete read; @@ -288,19 +308,22 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } - + + //cluster using cluster classes while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){ + if (m->debug) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; } + cluster->update(cutoff); if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -323,7 +346,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -354,7 +377,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -384,7 +407,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -401,7 +424,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -417,7 +440,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); @@ -432,7 +455,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); @@ -456,7 +479,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); @@ -492,7 +515,7 @@ int MGClusterCommand::execute(){ if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); @@ -513,15 +536,16 @@ int MGClusterCommand::execute(){ m->mothurRemove(overlapFile); } - delete list; + delete list; delete rabund; listFile.close(); - sabundFile.close(); - rabundFile.close(); - + if (countfile == "") { + sabundFile.close(); + rabundFile.close(); + } if (m->control_pressed) { delete nameMap; - listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); + listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } @@ -529,8 +553,10 @@ int MGClusterCommand::execute(){ m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); - m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); - m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); + if (countfile == "") { + m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); + m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); + } m->mothurOutEndLine(); if (saveCutoff != cutoff) { @@ -573,12 +599,14 @@ int MGClusterCommand::execute(){ void MGClusterCommand::printData(ListVector* mergedList){ try { mergedList->print(listFile); - mergedList->getRAbundVector().print(rabundFile); - - SAbundVector sabund = mergedList->getSAbundVector(); + SAbundVector sabund = mergedList->getSAbundVector(); + + if (countfile == "") { + mergedList->getRAbundVector().print(rabundFile); + sabund.print(sabundFile); + } sabund.print(cout); - sabund.print(sabundFile); } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "printData"); @@ -709,10 +737,31 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve //********************************************************************************************************************** -RAbundVector MGClusterCommand::createRabund(ListVector list, map nameMapCounts){ - for(int i = 0; i < list->getNumBins(); i++) { - +void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){ + try { + //vector names = ct.getNamesOfSeqs(); + + //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); } + //return rav; + + for(int i = 0; i < list->getNumBins(); i++) { + vector binNames; + string bin = list->get(i); + m->splitAtComma(bin, binNames); + int total = 0; + for (int j = 0; j < binNames.size(); j++) { + total += ct->getNumSeqs(binNames[j]); + } + rabund->push_back(total); + } + + } + catch(exception& e) { + m->errorOut(e, "MGClusterCommand", "createRabund"); + exit(1); + } + } //**********************************************************************************************************************