From 86b6cc7ce1ec7fce12cdfdd6225de4dee7cfbdbf Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 3 Jun 2010 17:02:17 +0000 Subject: [PATCH] fixed cluster.split command --- chopseqscommand.cpp | 2 +- clustersplitcommand.cpp | 221 ++++++++++++++++++++++++++-------------- clustersplitcommand.h | 3 +- readtree.cpp | 3 +- tree.cpp | 4 + 5 files changed, 151 insertions(+), 82 deletions(-) diff --git a/chopseqscommand.cpp b/chopseqscommand.cpp index 9020d2b..47f151d 100644 --- a/chopseqscommand.cpp +++ b/chopseqscommand.cpp @@ -77,7 +77,7 @@ ChopSeqsCommand::ChopSeqsCommand(string option) { void ChopSeqsCommand::help(){ try { - m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta with sequences trimmed to the end position.\n"); + m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences.\n"); m->mothurOut("The chop.seqs command parameters are fasta, end and fromend, fasta is required.\n"); m->mothurOut("The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, end=yourEnd).\n"); m->mothurOut("The end parameter allows you to specify an end base position for your sequences, default = 0.\n"); diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index ed995fc..d10a51f 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -313,8 +313,12 @@ int ClusterSplitCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; } //****************** merge list file and create rabund and sabund files ******************************// - - mergeLists(listFileNames, singletonName, labels); + ListVector* listSingle; + map labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins + + if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + mergeLists(listFileNames, labelBins, listSingle); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } @@ -333,21 +337,14 @@ int ClusterSplitCommand::execute(){ } } //********************************************************************************************************************** -int ClusterSplitCommand::mergeLists(vector listNames, string singleton, set userLabels){ +map ClusterSplitCommand::completeListFile(vector listNames, string singleton, set userLabels, ListVector*& listSingle){ try { - if (outputDir == "") { outputDir += hasPath(distfile); } - fileroot = outputDir + getRootName(getSimpleName(distfile)); - - openOutputFile(fileroot+ tag + ".sabund", outSabund); - openOutputFile(fileroot+ tag + ".rabund", outRabund); - openOutputFile(fileroot+ tag + ".list", outList); - outputNames.push_back(fileroot+ tag + ".sabund"); - outputNames.push_back(fileroot+ tag + ".rabund"); - outputNames.push_back(fileroot+ tag + ".list"); + map labelBin; + vector orderFloat; + int numSingleBins; //read in singletons - ListVector* listSingle = NULL; if (singleton != "none") { ifstream in; openInputFile(singleton, in); @@ -359,102 +356,167 @@ int ClusterSplitCommand::mergeLists(vector listNames, string singleton, listSingle->push_back(secondCol); } in.close(); - } + remove(singleton.c_str()); + + numSingleBins = listSingle->getNumBins(); + }else{ listSingle = NULL; numSingleBins = 0; } - vector orderFloat; - //go through users set and make them floats so we can sort them for(set::iterator it = userLabels.begin(); it != userLabels.end(); ++it) { - float temp; + float temp = -10.0; - if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){ - convert(*it, temp); - orderFloat.push_back(temp); - }else if (*it == "unique") { orderFloat.push_back(-1.0); } - else { - userLabels.erase(*it); - it--; - } + if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) { convert(*it, temp); } + else if (*it == "unique") { temp = -1.0; } + + orderFloat.push_back(temp); + labelBin[temp] = numSingleBins; //initialize numbins } //sort order sort(orderFloat.begin(), orderFloat.end()); - - vector inputs; - vector lastLabels; - for (int i = 0; i < listNames.size(); i++) { - InputData* input = new InputData(listNames[i], "list"); - inputs.push_back(input); + userLabels.clear(); - ifstream in; - openInputFile(listNames[i], in); - ListVector tempList(in); - lastLabels.push_back(tempList.getLabel()); - in.close(); - } + //get the list info from each file + for (int k = 0; k < listNames.size(); k++) { - ListVector* merged = NULL; - - //for each label needed - for(int l = 0; l < orderFloat.size(); l++){ + if (m->control_pressed) { + if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str()); } + for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); } + return labelBin; + } - string thisLabel; - if (orderFloat[l] == -1) { thisLabel = "unique"; } - else { thisLabel = toString(orderFloat[l], length-1); } - - //get the list info from each file - for (int k = 0; k < listNames.size(); k++) { + InputData* input = new InputData(listNames[k], "list"); + ListVector* list = input->getListVector(); + string lastLabel = list->getLabel(); + + string filledInList = listNames[k] + "filledInTemp"; + ofstream outFilled; + openOutputFile(filledInList, outFilled); - if (m->control_pressed) { - if (listSingle != NULL) { delete listSingle; remove(singleton.c_str()); } - for (int i = 0; i < listNames.size(); i++) { delete inputs[i]; remove(listNames[i].c_str()); } - delete merged; merged = NULL; - return 0; - } - - ListVector* list = inputs[k]->getListVector(); - + //for each label needed + for(int l = 0; l < orderFloat.size(); l++){ + + string thisLabel; + if (orderFloat[l] == -1) { thisLabel = "unique"; } + else { thisLabel = toString(orderFloat[l], length-1); } + //this file has reached the end - if (list == NULL) { list = inputs[k]->getListVector(lastLabels[k], true); } + if (list == NULL) { + list = input->getListVector(lastLabel, true); + }else{ //do you have the distance, or do you need to fill in - float labelFloat; - if (list->getLabel() == "unique") { labelFloat = -1.0; } - else { convert(list->getLabel(), labelFloat); } - - //check for missing labels - if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one - //if its bigger get last label, otherwise keep it - delete list; - list = inputs[k]->getListVector(lastLabels[k], true); //get last list vector to use, you actually want to move back in the file - } - lastLabels[k] = list->getLabel(); + float labelFloat; + if (list->getLabel() == "unique") { labelFloat = -1.0; } + else { convert(list->getLabel(), labelFloat); } - //is this the first file - if (merged == NULL) { merged = new ListVector(); merged->setLabel(thisLabel); } - - for (int j = 0; j < list->getNumBins(); j++) { - merged->push_back(list->get(j)); + //check for missing labels + if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one + //if its bigger get last label, otherwise keep it + delete list; + list = input->getListVector(lastLabel, true); //get last list vector to use, you actually want to move back in the file + } + lastLabel = list->getLabel(); } + //print to new file + list->setLabel(thisLabel); + list->print(outFilled); + + //update labelBin + labelBin[orderFloat[l]] += list->getNumBins(); + delete list; + + list = input->getListVector(); } + if (list != NULL) { delete list; } + delete input; + + outFilled.close(); + remove(listNames[k].c_str()); + rename(filledInList.c_str(), listNames[k].c_str()); + } + + return labelBin; + } + catch(exception& e) { + m->errorOut(e, "ClusterSplitCommand", "completeListFile"); + exit(1); + } +} +//********************************************************************************************************************** +int ClusterSplitCommand::mergeLists(vector listNames, map userLabels, ListVector* listSingle){ + try { + if (outputDir == "") { outputDir += hasPath(distfile); } + fileroot = outputDir + getRootName(getSimpleName(distfile)); + + openOutputFile(fileroot+ tag + ".sabund", outSabund); + openOutputFile(fileroot+ tag + ".rabund", outRabund); + openOutputFile(fileroot+ tag + ".list", outList); + + outputNames.push_back(fileroot+ tag + ".sabund"); + outputNames.push_back(fileroot+ tag + ".rabund"); + outputNames.push_back(fileroot+ tag + ".list"); + + map::iterator itLabel; + + //for each label needed + for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) { + + string thisLabel; + if (itLabel->first == -1) { thisLabel = "unique"; } + else { thisLabel = toString(itLabel->first, length-1); } + + outList << thisLabel << '\t' << itLabel->second << '\t'; + + RAbundVector* rabund = new RAbundVector(); + rabund->setLabel(thisLabel); + //add in singletons if (listSingle != NULL) { for (int j = 0; j < listSingle->getNumBins(); j++) { - merged->push_back(listSingle->get(j)); + outList << listSingle->get(j) << '\t'; + rabund->push_back(getNumNames(listSingle->get(j))); } } - //print to files - printData(merged); + //get the list info from each file + for (int k = 0; k < listNames.size(); k++) { + + if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); } delete rabund; return 0; } + + InputData* input = new InputData(listNames[k], "list"); + ListVector* list = input->getListVector(thisLabel); + + //this file has reached the end + if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine(); } + else { + for (int j = 0; j < list->getNumBins(); j++) { + outList << list->get(j) << '\t'; + rabund->push_back(getNumNames(list->get(j))); + } + delete list; + } + delete input; + } + + SAbundVector sabund = rabund->getSAbundVector(); + + sabund.print(outSabund); + rabund->print(outRabund); + outList << endl; - delete merged; merged = NULL; + delete rabund; } - if (listSingle != NULL) { delete listSingle; remove(singleton.c_str()); } + outList.close(); + outRabund.close(); + outSabund.close(); - for (int i = 0; i < listNames.size(); i++) { delete inputs[i]; remove(listNames[i].c_str()); } + if (listSingle != NULL) { delete listSingle; } + + for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); } return 0; } @@ -463,6 +525,7 @@ int ClusterSplitCommand::mergeLists(vector listNames, string singleton, exit(1); } } + //********************************************************************************************************************** void ClusterSplitCommand::printData(ListVector* oldList){ diff --git a/clustersplitcommand.h b/clustersplitcommand.h index e838bc4..05ae8b8 100644 --- a/clustersplitcommand.h +++ b/clustersplitcommand.h @@ -42,7 +42,8 @@ private: void printData(ListVector*); int createProcesses(vector < vector < map > >); vector cluster(vector< map >, set&); - int mergeLists(vector, string, set); + int mergeLists(vector, map, ListVector*); + map completeListFile(vector, string, set, ListVector*&); }; #endif diff --git a/readtree.cpp b/readtree.cpp index 3062f66..0d25f7e 100644 --- a/readtree.cpp +++ b/readtree.cpp @@ -230,7 +230,7 @@ int ReadNewickTree::readTreeString() { lc = readNewickInt(filehandle, n, T); if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading - + if(filehandle.peek()==','){ readSpecialChar(filehandle,',',"comma"); } @@ -291,6 +291,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) { if(c == '('){ int lc = readNewickInt(f, n, T); if (lc == -1) { return -1; } //reports an error in reading + readSpecialChar(f,',',"comma"); int rc = readNewickInt(f, n, T); diff --git a/tree.cpp b/tree.cpp index b6cfa0b..d6634bc 100644 --- a/tree.cpp +++ b/tree.cpp @@ -713,6 +713,10 @@ void Tree::parseTreeFile() { } } filehandle.close(); + + for (int i = 0; i < globaldata->Treenames.size(); i++) { +cout << globaldata->Treenames[i] << endl; } +cout << "done" << endl; } catch(exception& e) { m->errorOut(e, "Tree", "parseTreeFile"); -- 2.39.2