X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=clustersplitcommand.cpp;h=95693cc420a7e55d5c9a96a102e3d9e55ad0221c;hp=87d26cea75033c911d4db315265123fdd3dd49de;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=eb71e28b7b7afd82540f4a8f0bac9429c5b9d713 diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index 87d26ce..95693cc 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -61,7 +61,7 @@ string ClusterSplitCommand::getHelpString(){ helpString += "The cluster parameter allows you to indicate whether you want to run the clustering or just split the distance matrix, default=t"; helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n"; helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n"; - helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n"; + helpString += "The method allows you to specify what clustering algorithm you want to use, default=average, option furthest, nearest, or average. \n"; helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n"; helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n"; helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n"; @@ -442,8 +442,7 @@ int ClusterSplitCommand::execute(){ if (m->debug) { m->mothurOut("[DEBUG]: distName.size() = " + toString(distName.size()) + ".\n"); } //output a merged distance file - if (splitmethod == "fasta") { createMergedDistanceFile(distName); } - + //if (splitmethod == "fasta") { createMergedDistanceFile(distName); } if (m->control_pressed) { return 0; } @@ -844,9 +843,12 @@ int ClusterSplitCommand::mergeLists(vector listNames, map us if (itLabel->first == -1) { thisLabel = "unique"; } else { thisLabel = toString(itLabel->first, length-1); } - outList << thisLabel << '\t' << itLabel->second << '\t'; + //outList << thisLabel << '\t' << itLabel->second << '\t'; RAbundVector* rabund = NULL; + ListVector completeList; + completeList.setLabel(thisLabel); + if (countfile == "") { rabund = new RAbundVector(); rabund->setLabel(thisLabel); @@ -855,7 +857,8 @@ int ClusterSplitCommand::mergeLists(vector listNames, map us //add in singletons if (listSingle != NULL) { for (int j = 0; j < listSingle->getNumBins(); j++) { - outList << listSingle->get(j) << '\t'; + //outList << listSingle->get(j) << '\t'; + completeList.push_back(listSingle->get(j)); if (countfile == "") { rabund->push_back(m->getNumNames(listSingle->get(j))); } } } @@ -872,7 +875,8 @@ int ClusterSplitCommand::mergeLists(vector listNames, map us if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine(); } else { for (int j = 0; j < list->getNumBins(); j++) { - outList << list->get(j) << '\t'; + //outList << list->get(j) << '\t'; + completeList.push_back(list->get(j)); if (countfile == "") { rabund->push_back(m->getNumNames(list->get(j))); } } delete list; @@ -885,7 +889,8 @@ int ClusterSplitCommand::mergeLists(vector listNames, map us sabund.print(outSabund); rabund->print(outRabund); } - outList << endl; + //outList << endl; + completeList.print(outList); if (rabund != NULL) { delete rabund; } } @@ -947,7 +952,7 @@ vector ClusterSplitCommand::createProcesses(vector< map if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); } } - //not lets reverse the order of ever other process, so we balance big files running with little ones + //now lets reverse the order of ever other process, so we balance big files running with little ones for (int i = 0; i < processors; i++) { //cout << i << endl; int remainder = ((i+1) % processors); @@ -1169,7 +1174,7 @@ string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisN cluster->readPhylipFile(thisDistFile, nameMap); }else if (countfile != "") { ct = new CountTable(); - ct->readTable(thisNamefile); + ct->readTable(thisNamefile, false); cluster->readPhylipFile(thisDistFile, ct); } tag = cluster->getTag(); @@ -1297,7 +1302,7 @@ string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile read->read(nameMap); }else if (countfile != "") { ct = new CountTable(); - ct->readTable(thisNamefile); + ct->readTable(thisNamefile, false); read->read(ct); }else { read->read(nameMap); } @@ -1325,9 +1330,10 @@ string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine(); //create cluster - if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); } - else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); } - else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); } + float adjust = -1.0; + if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); } + else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); } + else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust); } tag = cluster->getTag(); if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }