From 2a7d1455e8cfe4f67a7173f3a7249762c5436217 Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 9 Jun 2010 12:07:11 +0000 Subject: [PATCH] changes to blastdb to make filenames given to blast unique, changes to split.abund, fixed bugs in chimera.slayer realigner, optimized splitmatrix and modified cluster.split command. --- blastdb.cpp | 21 +- chimerarealigner.cpp | 25 +- chimeraslayercommand.cpp | 2 - clustersplitcommand.cpp | 29 +- clustersplitcommand.h | 4 +- makefile | 4 +- maligner.cpp | 8 +- readtree.cpp | 16 +- splitabundcommand.cpp | 909 +++++++++++++++++++++++++++++---------- splitabundcommand.h | 20 +- splitmatrix.cpp | 516 ++++++++++++++-------- splitmatrix.h | 6 +- tree.cpp | 91 ++-- tree.h | 4 + 14 files changed, 1185 insertions(+), 470 deletions(-) diff --git a/blastdb.cpp b/blastdb.cpp index 35a19f4..760824d 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -54,21 +54,22 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { vector topMatches; ofstream queryFile; - openOutputFile(queryFileName, queryFile); + openOutputFile((queryFileName+seq->getName()), queryFile); queryFile << '>' << seq->getName() << endl; queryFile << seq->getUnaligned() << endl; queryFile.close(); + // the goal here is to quickly survey the database to find the closest match. To do this we are using the default // wordsize used in megablast. I'm sure we're sacrificing accuracy for speed, but anyother way would take way too // long. With this setting, it seems comparable in speed to the suffix tree approach. string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);; - blastCommand += (" -i " + queryFileName + " -o " + blastFileName); + blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName()); system(blastCommand.c_str()); ifstream m8FileHandle; - openInputFile(blastFileName, m8FileHandle); + openInputFile(blastFileName+seq->getName(), m8FileHandle, "no error"); string dummy; int templateAccession; @@ -84,7 +85,9 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { topMatches.push_back(templateAccession); } m8FileHandle.close(); - + remove((queryFileName+seq->getName()).c_str()); + remove((blastFileName+seq->getName()).c_str()); + return topMatches; } catch(exception& e) { @@ -100,7 +103,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n) { vector topMatches; ofstream queryFile; - openOutputFile(queryFileName, queryFile); + openOutputFile((queryFileName+seq->getName()), queryFile); queryFile << '>' << seq->getName() << endl; queryFile << seq->getUnaligned() << endl; queryFile.close(); @@ -110,11 +113,11 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n) { // long. With this setting, it seems comparable in speed to the suffix tree approach. string blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn - blastCommand += (" -i " + queryFileName + " -o " + blastFileName); + blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName()); system(blastCommand.c_str()); - + ifstream m8FileHandle; - openInputFile(blastFileName, m8FileHandle, "no error"); + openInputFile(blastFileName+seq->getName(), m8FileHandle, "no error"); string dummy; int templateAccession; @@ -131,6 +134,8 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n) { //cout << templateAccession << endl; } m8FileHandle.close(); + remove((queryFileName+seq->getName()).c_str()); + remove((blastFileName+seq->getName()).c_str()); //cout << "\n\n" ; return topMatches; } diff --git a/chimerarealigner.cpp b/chimerarealigner.cpp index 487e7ca..1da3278 100644 --- a/chimerarealigner.cpp +++ b/chimerarealigner.cpp @@ -24,19 +24,21 @@ void ChimeraReAligner::reAlign(Sequence* query, vector parents) { string qAligned = query->getAligned(); string newQuery = ""; - + //sort parents by region start sort(parents.begin(), parents.end(), compareRegionStart); //make sure you don't cutoff beginning of query if (parents[0].nastRegionStart > 0) { newQuery += qAligned.substr(0, parents[0].nastRegionStart); } int longest = 0; + //take query and break apart into pieces using breakpoints given by results of parents for (int i = 0; i < parents.size(); i++) { + int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1; string q = qAligned.substr(parents[i].nastRegionStart, length); Sequence* queryFrag = new Sequence(query->getName(), q); - + queryParts.push_back(queryFrag); Sequence* parent = getSequence(parents[i].parent); @@ -46,26 +48,35 @@ void ChimeraReAligner::reAlign(Sequence* query, vector parents) { parent->setAligned(p); parentParts.push_back(parent); - + if (q.length() > longest) { longest = q.length(); } if (p.length() > longest) { longest = p.length(); } } - + //align each peice to correct parent from results for (int i = 0; i < queryParts.size(); i++) { alignment = new NeedlemanOverlap(-2.0, match, misMatch, longest+1); //default gapopen, match, mismatch, longestbase Nast nast(alignment, queryParts[i], parentParts[i]); delete alignment; } - + //recombine pieces to form new query sequence for (int i = 0; i < queryParts.size(); i++) { + //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100. + //we don't want to loose length so in this case we will leave query alone + if (i != 0) { + int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1; + if (space > 0) { //they don't meet and we need to add query piece + string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space); + newQuery += q; + } + } + newQuery += queryParts[i]->getAligned(); } - //make sure you don't cutoff end of query - if (parents[parents.size()-1].nastRegionEnd < qAligned.length()) { newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd); } + if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) { newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1); } //set query to new aligned string query->setAligned(newQuery); diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index c41e026..8a0e287 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -292,7 +292,6 @@ int ChimeraSlayerCommand::execute(){ int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } - //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); @@ -377,7 +376,6 @@ int ChimeraSlayerCommand::execute(){ lines.push_back(new linePair(startPos, numSeqsPerProcessor)); } - createProcesses(outputFileName, fastafile, accnosFileName); rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index 861c4fb..c3faab7 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -27,7 +27,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { else { //valid paramters for this command - string Array[] = {"phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","showabund","timing","hard","processors","outputdir","inputdir"}; + string Array[] = {"phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -125,6 +125,9 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; } hard = isTrue(temp); + temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; } + large = isTrue(temp); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } convert(temp, processors); @@ -165,7 +168,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { void ClusterSplitCommand::help(){ try { - m->mothurOut("The cluster.split command parameter options are phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, processors. Phylip or column and name are required.\n"); + m->mothurOut("The cluster.split command parameter options are phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Phylip or column and name are required.\n"); m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n"); m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n"); m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n"); @@ -174,6 +177,7 @@ void ClusterSplitCommand::help(){ m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance or classify. \n"); m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n"); m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1. \n"); + m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n"); m->mothurOut("The cluster.split command should be in the following format: \n"); m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n"); m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n"); @@ -229,11 +233,12 @@ int ClusterSplitCommand::execute(){ if (m->control_pressed) { return 0; } time_t estart = time(NULL); + m->mothurOut("Splitting the distance file..."); m->mothurOutEndLine(); //split matrix into non-overlapping groups SplitMatrix* split; - if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod); } - else { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod); } + if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); } + else { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); } split->split(); @@ -284,6 +289,8 @@ int ClusterSplitCommand::execute(){ ifstream in; openInputFile(filename, in); + in >> tag; gobble(in); + while(!in.eof()) { string tempName; in >> tempName; gobble(in); @@ -299,7 +306,7 @@ int ClusterSplitCommand::execute(){ while(!in2.eof()) { string tempName; - in2 >> tempName; gobble(in); + in2 >> tempName; gobble(in2); if (labels.count(tempName) == 0) { labels.insert(tempName); } } in2.close(); @@ -312,7 +319,12 @@ int ClusterSplitCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; } + m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine(); + //****************** merge list file and create rabund and sabund files ******************************// + estart = time(NULL); + m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine(); + ListVector* listSingle; map labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins @@ -322,7 +334,7 @@ int ClusterSplitCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -337,7 +349,7 @@ int ClusterSplitCommand::execute(){ } } //********************************************************************************************************************** -map ClusterSplitCommand::completeListFile(vector listNames, string singleton, set userLabels, ListVector*& listSingle){ +map ClusterSplitCommand::completeListFile(vector listNames, string singleton, set& userLabels, ListVector*& listSingle){ try { map labelBin; @@ -367,7 +379,7 @@ map ClusterSplitCommand::completeListFile(vector listNames, if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) { convert(*it, temp); } else if (*it == "unique") { temp = -1.0; } - + orderFloat.push_back(temp); labelBin[temp] = numSingleBins; //initialize numbins } @@ -571,6 +583,7 @@ int ClusterSplitCommand::createProcesses(vector < vector < map > string filename = toString(getpid()) + ".temp"; ofstream out; openOutputFile(filename, out); + out << tag << endl; for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl; } out.close(); diff --git a/clustersplitcommand.h b/clustersplitcommand.h index 05ae8b8..c696329 100644 --- a/clustersplitcommand.h +++ b/clustersplitcommand.h @@ -35,7 +35,7 @@ private: string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile; double cutoff, splitcutoff; int precision, length, processors, taxLevelCutoff; - bool print_start, abort, hard; + bool print_start, abort, hard, large; time_t start; ofstream outList, outRabund, outSabund; @@ -43,7 +43,7 @@ private: int createProcesses(vector < vector < map > >); vector cluster(vector< map >, set&); int mergeLists(vector, map, ListVector*); - map completeListFile(vector, string, set, ListVector*&); + map completeListFile(vector, string, set&, ListVector*&); }; #endif diff --git a/makefile b/makefile index a973241..91a269c 100644 --- a/makefile +++ b/makefile @@ -1697,11 +1697,11 @@ install : mothur ./splitmatrix.o : splitmatrix.cpp $(CC) $(CC_OPTIONS) splitmatrix.cpp -c $(INCLUDE) -o ./splitmatrix.o -# Item # 207 -- splitmatrix -- +# Item # 207 -- clustersplit -- ./clustersplitcommand.o : clustersplitcommand.cpp $(CC) $(CC_OPTIONS) clustersplitcommand.cpp -c $(INCLUDE) -o ./clustersplitcommand.o -# Item # 207 -- splitmatrix -- +# Item # 208 -- classifyotu -- ./classifyotucommand.o : classifyotucommand.cpp $(CC) $(CC_OPTIONS) classifyotucommand.cpp -c $(INCLUDE) -o ./classifyotucommand.o diff --git a/maligner.cpp b/maligner.cpp index 5da4b9f..1d52589 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -502,13 +502,13 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence - Sequence* queryLeft = new Sequence(q->getName(), leftQuery); - Sequence* queryRight = new Sequence(q->getName(), rightQuery); + Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery); + Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery); vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1); vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1); - - //if ((tempIndexesRight.size() != (num+1)) || (tempIndexesLeft.size() != (num+1))) { m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to porcess sequence " + q->getName()); m->mothurOutEndLine(); return refResults; } + + //if ((tempIndexesRight.size() == 0) && (tempIndexesLeft.size() == 0)) { m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to process sequence " + q->getName()); m->mothurOutEndLine(); return refResults; } vector smaller; vector larger; diff --git a/readtree.cpp b/readtree.cpp index 0d25f7e..51f44e2 100644 --- a/readtree.cpp +++ b/readtree.cpp @@ -314,7 +314,21 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) { }else{ T->tree[n].setBranchLength(0.0); } - + + //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating + /* if(f.peek() == ','){ + + //force this node to be left child and read new rc + T->tree[n].setChildren(lc,rc); + T->tree[lc].setParent(n); + T->tree[rc].setParent(n); + + n++; + + + + }*/ + T->tree[n].setChildren(lc,rc); T->tree[lc].setParent(n); T->tree[rc].setParent(n); diff --git a/splitabundcommand.cpp b/splitabundcommand.cpp index cb57684..f86891b 100644 --- a/splitabundcommand.cpp +++ b/splitabundcommand.cpp @@ -13,8 +13,6 @@ SplitAbundCommand::SplitAbundCommand(string option) { try { abort = false; - wroteRareList = false; - wroteAbundList = false; allLines = 1; //allow user to run help @@ -22,7 +20,7 @@ SplitAbundCommand::SplitAbundCommand(string option) { else { //valid paramters for this command - string Array[] = {"list","name","group","label","accnos","cutoff","outputdir","inputdir"}; + string Array[] = {"name","group","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //"list", vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -57,6 +55,14 @@ SplitAbundCommand::SplitAbundCommand(string option) { if (path == "") { parameters["group"] = inputDir + it->second; } } + it = parameters.find("fasta"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fasta"] = inputDir + it->second; } + } + it = parameters.find("name"); //user has given a template file if(it != parameters.end()){ @@ -74,26 +80,42 @@ SplitAbundCommand::SplitAbundCommand(string option) { //check for required parameters listfile = validParameter.validFile(parameters, "list", true); if (listfile == "not open") { abort = true; } - else if (listfile == "not found") { listfile = ""; } + else if (listfile == "not found") { listfile = ""; } + else{ inputFile = listfile; } - //check for required parameters namefile = validParameter.validFile(parameters, "name", true); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } + else{ inputFile = namefile; } + + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not open") { abort = true; } + else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; } groupfile = validParameter.validFile(parameters, "group", true); - if (groupfile == "not open") { abort = true; } + if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { groupMap = new GroupMap(groupfile); int error = groupMap->readMap(); if (error == 1) { abort = true; } + } + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else if (groups == "all") { + if (groupfile != "") { Groups = groupMap->namesOfGroups; } + else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; } + }else { + splitAtDash(groups, Groups); + } + + if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); } + //do you have all files needed if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; } - if ((listfile != "") && (namefile != "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command, but NOT BOTH. "); m->mothurOutEndLine(); abort = true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... @@ -123,13 +145,16 @@ SplitAbundCommand::SplitAbundCommand(string option) { //********************************************************************************************************************** void SplitAbundCommand::help(){ try { - m->mothurOut("The split.abund command reads a list or a names file splits the sequences into rare and abundant groups.. \n"); - m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label and accnos.\n"); - m->mothurOut("The list or name parameter is required, and you must provide a cutoff value.\n"); + m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n"); + m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label, groups and accnos.\n"); + m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n"); m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n"); m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n"); m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n"); m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n"); + m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n"); + m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n"); + m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n"); m->mothurOut("The split.abund command should be used in the following format: split.abund(list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n"); m->mothurOut("Example: split.abundt(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n"); @@ -150,13 +175,25 @@ int SplitAbundCommand::execute(){ if (abort == true) { return 0; } - if (namefile != "") { split(); } - else { + if (listfile != "") { //you are using a listfile to determine abundance //remove old files so you can append later.... string fileroot = outputDir + getRootName(getSimpleName(listfile)); - remove((fileroot + "rare.list").c_str()); - remove((fileroot + "abund.list").c_str()); + if (Groups.size() == 0) { + remove((fileroot + "rare.list").c_str()); + remove((fileroot + "abund.list").c_str()); + + wroteListFile["rare"] = false; + wroteListFile["abund"] = false; + }else{ + for (int i=0; i processedLabels; @@ -166,6 +203,10 @@ int SplitAbundCommand::execute(){ list = input->getListVector(); string lastLabel = list->getLabel(); + //do you have a namefile or do we need to similate one? + if (namefile != "") { readNamesFile(); } + else { createNameMap(list); } + if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { @@ -175,7 +216,7 @@ int SplitAbundCommand::execute(){ if(allLines == 1 || labels.count(list->getLabel()) == 1){ m->mothurOut(list->getLabel()); m->mothurOutEndLine(); - split(list); + splitList(list); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); @@ -188,7 +229,7 @@ int SplitAbundCommand::execute(){ list = input->getListVector(lastLabel); //get new list vector to process m->mothurOut(list->getLabel()); m->mothurOutEndLine(); - split(list); + splitList(list); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); @@ -228,20 +269,36 @@ int SplitAbundCommand::execute(){ list = input->getListVector(lastLabel); //get new list vector to process m->mothurOut(list->getLabel()); m->mothurOutEndLine(); - split(list); + splitList(list); delete list; } delete input; + for (map::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) { + string filename = fileroot + itBool->first; + if (itBool->second) { //we wrote to this file + outputNames.push_back(filename); + }else{ + remove(filename.c_str()); + } + } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + + }else { //you are using the namefile to determine abundance + + splitNames(); + writeNames(); - if (wroteAbundList) { outputNames.push_back(fileroot + "abund.list"); } - if (wroteRareList) { outputNames.push_back(fileroot + "rare.list"); } + string tag = ""; + if (groupfile != "") { parseGroup(tag); } + if (accnos) { writeAccnos(tag); } + if (fastafile != "") { parseFasta(tag); } } - - + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -255,185 +312,217 @@ int SplitAbundCommand::execute(){ } } /**********************************************************************************************************************/ -int SplitAbundCommand::split(ListVector* thisList) { +int SplitAbundCommand::splitList(ListVector* thisList) { try { - - SAbundVector* sabund = new SAbundVector(); - *sabund = thisList->getSAbundVector(); + rareNames.clear(); + abundNames.clear(); - //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time - // and don't have to store the bins until you are done with the whole vector, this save alot of space. - int numRareBins = 0; - for (int i = 0; i <= sabund->getMaxRank(); i++) { - if (i > cutoff) { break; } - numRareBins += sabund->get(i); - } - int numAbundBins = thisList->getNumBins() - numRareBins; - delete sabund; + //get rareNames and abundNames + for (int i = 0; i < thisList->getNumBins(); i++) { + if (m->control_pressed) { return 0; } + + string bin = thisList->get(i); + + vector names; + splitAtComma(bin, names); //parses bin into individual sequence names + int size = names.size(); + + if (size <= cutoff) { + for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); } + }else{ + for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); } + } + }//end for + + writeList(thisList); + + string tag = thisList->getLabel() + "."; + if (groupfile != "") { parseGroup(tag); } + if (accnos) { writeAccnos(tag); } + if (fastafile != "") { parseFasta(tag); } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "splitList"); + exit(1); + } +} +/**********************************************************************************************************************/ +int SplitAbundCommand::writeList(ListVector* thisList) { + try { + + map filehandles; - //setup output files - ofstream outListAbund; - ofstream outListRare; - ofstream outGroupRare; - ofstream outGroupAbund; - ofstream outAccnosRare; - ofstream outAccnosAbund; + if (Groups.size() == 0) { + SAbundVector* sabund = new SAbundVector(); + *sabund = thisList->getSAbundVector(); - string fileroot = outputDir + getRootName(getSimpleName(listfile)); - if (numRareBins > 0) { - wroteRareList = true; - string listRareName = fileroot + "rare.list"; - openOutputFileAppend(listRareName, outListRare); - outListRare << thisList->getLabel() << '\t' << numRareBins << '\t'; + //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time + // and don't have to store the bins until you are done with the whole vector, this save alot of space. + int numRareBins = 0; + for (int i = 0; i <= sabund->getMaxRank(); i++) { + if (i > cutoff) { break; } + numRareBins += sabund->get(i); + } + int numAbundBins = thisList->getNumBins() - numRareBins; + delete sabund; + + ofstream aout; + ofstream rout; + + if (rareNames.size() != 0) { + string rare = outputDir + getRootName(getSimpleName(listfile)) + ".rare.list"; + wroteListFile["rare"] = true; + openOutputFileAppend(rare, rout); + rout << thisList->getLabel() << '\t' << numRareBins << '\t'; + } - if (accnos) { - string accnosName = fileroot + thisList->getLabel() + ".rare.accnos"; - openOutputFile(accnosName, outAccnosRare); - outputNames.push_back(accnosName); + if (abundNames.size() != 0) { + string abund = outputDir + getRootName(getSimpleName(listfile)) + ".abund.list"; + wroteListFile["abund"] = true; + openOutputFileAppend(abund, aout); + rout << thisList->getLabel() << '\t' << numAbundBins << '\t'; } + + for (int i = 0; i < thisList->getNumBins(); i++) { + if (m->control_pressed) { break; } + + string bin = list->get(i); - if (groupfile != "") { - string groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".rare.group"; - openOutputFile(groupFileName, outGroupRare); - outputNames.push_back(groupFileName); + int size = getNumNames(bin); + + if (size <= cutoff) { rout << bin << '\t'; } + else { aout << bin << '\t'; } } - } - - if (numAbundBins > 0) { - wroteAbundList = true; - string listAbundName = fileroot + "abund.list"; - openOutputFileAppend(listAbundName, outListAbund); - outListAbund << thisList->getLabel() << '\t' << numAbundBins << '\t'; - if (accnos) { - string accnosName = fileroot + thisList->getLabel() + ".abund.accnos"; - openOutputFile(accnosName, outAccnosAbund); - outputNames.push_back(accnosName); + if (rareNames.size() != 0) { rout << endl; rout.close(); } + if (abundNames.size() != 0) { aout << endl; aout.close(); } + + }else{ //parse names by abundance and group + string fileroot = outputDir + getRootName(getSimpleName(listfile)); + ofstream* temp; + ofstream* temp2; + map wroteFile; + map filehandles; + map::iterator it3; + + for (int i=0; igetLabel() + ".abund.group"; - openOutputFile(groupFileName, outGroupAbund); - outputNames.push_back(groupFileName); + map groupVector; + map::iterator itGroup; + map groupNumBins; + + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + groupNumBins[it3->first] = 0; + groupVector[it3->first] = ""; } - } - for (int i = 0; i < thisList->getNumBins(); i++) { - if (m->control_pressed) { break; } - - string bin = list->get(i); + for (int i = 0; i < thisList->getNumBins(); i++) { + if (m->control_pressed) { break; } - int size = getNumNames(bin); + map groupBins; + string bin = list->get(i); - if (size <= cutoff) { outListRare << bin << '\t'; } - else { outListAbund << bin << '\t'; } - - if ((groupfile != "") || (accnos)) { //you need to parse the bin... vector names; splitAtComma(bin, names); //parses bin into individual sequence names - + //parse bin into list of sequences in each group for (int j = 0; j < names.size(); j++) { - - //write to accnos file - if (accnos) { - if (size <= cutoff) { outAccnosRare << names[j] << endl; } - else { outAccnosAbund << names[j] << endl; } + string rareAbund; + if (rareNames.count(names[j]) != 0) { //you are a rare name + rareAbund = ".rare"; + }else{ //you are a abund name + rareAbund = ".abund"; } - //write to groupfile - if (groupfile != "") { - string group = groupMap->getGroup(names[j]); - - if (group == "not found") { //error in groupfile so close and remove output file and disregard groupfile - m->mothurOut(names[j] + " is not in your groupfile. disregarding groupfile."); m->mothurOutEndLine(); - delete groupMap; - if (numAbundBins > 0) { - outGroupAbund.close(); - remove((outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".abund.group").c_str()); - } - if (numRareBins > 0) { - outGroupRare.close(); - remove((outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".rare.group").c_str()); - } - groupfile = ""; - }else { - if (size <= cutoff) { outGroupRare << names[j] << '\t' << group << endl; } - else { outGroupAbund << names[j] << '\t' << group << endl; } + string group = groupMap->getGroup(names[j]); + + if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + itGroup = groupBins.find(group+rareAbund); + if(itGroup == groupBins.end()) { + groupBins[group+rareAbund] = names[j]; //add first name + groupNumBins[group+rareAbund]++; + }else{ //add another name + groupBins[group+rareAbund] += "," + names[j]; } + }else if(group == "not found") { + m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine(); } - - }//end for names - }//end if parse - }//end for list - - - //close files - if (numRareBins > 0) { - outListRare << endl; - outListRare.close(); - if (accnos) { outAccnosRare.close(); } - if (groupfile != "") { outGroupRare.close(); } + } + + + for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) { + groupVector[itGroup->first] += itGroup->second + '\t'; + } + } + + //end list vector + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group + wroteListFile[it3->first] = true; + (*(filehandles[it3->first])).close(); + delete it3->second; + } } - if (numAbundBins > 0) { - outListAbund << endl; - outListAbund.close(); - if (accnos) { outAccnosAbund.close(); } - if (groupfile != "") { outGroupAbund.close(); } - } - return 0; } catch(exception& e) { - m->errorOut(e, "SplitAbundCommand", "split"); + m->errorOut(e, "SplitAbundCommand", "writeList"); exit(1); } } /**********************************************************************************************************************/ -int SplitAbundCommand::split() { //namefile +int SplitAbundCommand::splitNames() { //namefile try { - //setup output files - ofstream outNameAbund; - ofstream outNameRare; - ofstream outGroupRare; - ofstream outGroupAbund; - ofstream outAccnosRare; - ofstream outAccnosAbund; - bool wroteNameAbund = false; - bool wroteNameRare = false; - bool wroteGroupRare = false; - bool wroteGroupAbund = false; - bool wroteAccnosRare = false; - bool wroteAccnosAbund = false; - - //prepare output files - string fileroot = outputDir + getRootName(getSimpleName(namefile)); - - string nameRareName = fileroot + "rare.names"; - openOutputFile(nameRareName, outNameRare); - string nameAbundName = fileroot + "abund.names"; - openOutputFile(nameAbundName, outNameAbund); + rareNames.clear(); + abundNames.clear(); - if (accnos) { - string accnosName = fileroot + "rare.accnos"; - openOutputFile(accnosName, outAccnosRare); + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (m->control_pressed) { break; } - accnosName = fileroot + "abund.accnos"; - openOutputFile(accnosName, outAccnosAbund); - } + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); - if (groupfile != "") { - string groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group"; - openOutputFile(groupFileName, outGroupRare); + nameMap[firstCol] = secondCol; - groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group"; - openOutputFile(groupFileName, outGroupAbund); + int size = getNumNames(secondCol); + + if (size <= cutoff) { + rareNames.insert(firstCol); + }else{ + abundNames.insert(firstCol); + } } - - + in.close(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "splitNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +int SplitAbundCommand::readNamesFile() { + try { //open input file ifstream in; openInputFile(namefile, in); @@ -444,86 +533,470 @@ int SplitAbundCommand::split() { //namefile string firstCol, secondCol; in >> firstCol >> secondCol; gobble(in); - int size = getNumNames(secondCol); + nameMap[firstCol] = secondCol; + } + in.close(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "readNamesFile"); + exit(1); + } +} +/**********************************************************************************************************************/ +int SplitAbundCommand::createNameMap(ListVector* thisList) { + try { + + if (thisList != NULL) { + for (int i = 0; i < thisList->getNumBins(); i++) { + if (m->control_pressed) { return 0; } + + string bin = thisList->get(i); + + vector names; + splitAtComma(bin, names); //parses bin into individual sequence names - if (size <= cutoff) { outNameRare << firstCol << '\t' << secondCol << endl; wroteNameRare = true; } - else { outNameAbund << firstCol << '\t' << secondCol << endl; wroteNameAbund = true; } + for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; } + }//end for + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "createNameMap"); + exit(1); + } +} +/**********************************************************************************************************************/ +int SplitAbundCommand::writeNames() { //namefile + try { + + map filehandles; + + if (Groups.size() == 0) { + ofstream aout; + ofstream rout; + + if (rareNames.size() != 0) { + string rare = outputDir + getRootName(getSimpleName(namefile)) + "rare.names"; + openOutputFile(rare, rout); + outputNames.push_back(rare); + + for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { + rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl; + } + rout.close(); + } + + if (abundNames.size() != 0) { + string abund = outputDir + getRootName(getSimpleName(namefile)) + "abund.names"; + openOutputFile(abund, aout); + outputNames.push_back(abund); + + for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { + aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl; + } + aout.close(); + } + + }else{ //parse names by abundance and group + string fileroot = outputDir + getRootName(getSimpleName(namefile)); + ofstream* temp; + ofstream* temp2; + map wroteFile; + map filehandles; + map::iterator it3; + for (int i=0; i::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) { vector names; - splitAtComma(secondCol, names); //parses bin into individual sequence names + splitAtComma(itName->second, names); //parses bin into individual sequence names - //parse bin into list of sequences in each group - for (int j = 0; j < names.size(); j++) { + string rareAbund; + if (rareNames.count(itName->first) != 0) { //you are a rare name + rareAbund = ".rare"; + }else{ //you are a abund name + rareAbund = ".abund"; + } + + map outputStrings; + map::iterator itout; + for (int i = 0; i < names.size(); i++) { - //write to accnos file - if (accnos) { - if (size <= cutoff) { outAccnosRare << names[j] << endl; wroteAccnosRare = true; } - else { outAccnosAbund << names[j] << endl; wroteAccnosAbund = true; } + string group = groupMap->getGroup(names[i]); + + if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + itout = outputStrings.find(group+rareAbund); + if (itout == outputStrings.end()) { + outputStrings[group+rareAbund] = names[i] + '\t' + names[i]; + }else { outputStrings[group+rareAbund] += "," + names[i]; } + }else if(group == "not found") { + m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine(); } + } + + for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { + *(filehandles[itout->first]) << itout->second << endl; + wroteFile[itout->first] = true; + } + } + + + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(filehandles[it3->first])).close(); + if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + it3->first + ".names"); } + else { remove((it3->first).c_str()); } + delete it3->second; + } + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "writeNames"); + exit(1); + } +} +/**********************************************************************************************************************/ +//just write the unique names - if a namesfile is given +int SplitAbundCommand::writeAccnos(string tag) { + try { + + map filehandles; + + if (Groups.size() == 0) { + ofstream aout; + ofstream rout; + + if (rareNames.size() != 0) { + string rare = outputDir + getRootName(getSimpleName(inputFile)) + tag + "rare.accnos"; + openOutputFile(rare, rout); + outputNames.push_back(rare); + + for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { + rout << (*itRare) << endl; + } + rout.close(); + } + + if (abundNames.size() != 0) { + string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag + "abund.accnos"; + openOutputFile(abund, aout); + outputNames.push_back(abund); + + for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { + aout << (*itAbund) << endl; + } + aout.close(); + } + }else{ //parse names by abundance and group + string fileroot = outputDir + getRootName(getSimpleName(inputFile)); + ofstream* temp; + ofstream* temp2; + map wroteFile; + map filehandles; + map::iterator it3; + + for (int i=0; i::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { + string group = groupMap->getGroup(*itRare); - //write to groupfile - if (groupfile != "") { - string group = groupMap->getGroup(names[j]); + if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + *(filehandles[group+".rare"]) << *itRare << endl; + wroteFile[group+".rare"] = true; + } + } + + //write abund + for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { + string group = groupMap->getGroup(*itAbund); - if (group == "not found") { //error in groupfile so close and remove output file and disregard groupfile - m->mothurOut(names[j] + " is not in your groupfile. disregarding groupfile."); m->mothurOutEndLine(); - delete groupMap; - - outGroupAbund.close(); - remove((outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group").c_str()); - outGroupRare.close(); - remove((outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group").c_str()); - - groupfile = ""; - wroteGroupRare = false; - wroteGroupAbund = false; - }else { - if (size <= cutoff) { outGroupRare << names[j] << '\t' << group << endl; wroteGroupRare = true; } - else { outGroupAbund << names[j] << '\t' << group << endl; wroteGroupAbund = true; } + if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + *(filehandles[group+".abund"]) << *itAbund << endl; + wroteFile[group+".abund"] = true; + } + } + + //close files + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(filehandles[it3->first])).close(); + if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".accnos"); } + else { remove((fileroot + tag + it3->first + ".accnos").c_str()); } + delete it3->second; + } + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "writeAccnos"); + exit(1); + } +} +/**********************************************************************************************************************/ +int SplitAbundCommand::parseGroup(string tag) { //namefile + try { + + map filehandles; + + if (Groups.size() == 0) { + ofstream aout; + ofstream rout; + + if (rareNames.size() != 0) { + string rare = outputDir + getRootName(getSimpleName(groupfile)) + tag + "rare.groups"; + openOutputFile(rare, rout); + outputNames.push_back(rare); + } + + if (abundNames.size() != 0) { + string abund = outputDir + getRootName(getSimpleName(groupfile)) + tag + "abund.groups"; + openOutputFile(abund, aout); + outputNames.push_back(abund); + } + + + for (map::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) { + vector names; + splitAtComma(itName->second, names); //parses bin into individual sequence names + + for (int i = 0; i < names.size(); i++) { + + string group = groupMap->getGroup(names[i]); + + if (group == "not found") { + m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine(); + }else { + if (rareNames.count(itName->first) != 0) { //you are a rare name + rout << names[i] << '\t' << group << endl; + }else{ //you are a abund name + rout << names[i] << '\t' << group << endl; } } - - }//end for names - }//end if parse - }//end while + } + } + + if (rareNames.size() != 0) { rout.close(); } + if (abundNames.size() != 0) { aout.close(); } + + }else{ //parse names by abundance and group + string fileroot = outputDir + getRootName(getSimpleName(groupfile)); + ofstream* temp; + ofstream* temp2; + map wroteFile; + map filehandles; + map::iterator it3; + + for (int i=0; i::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) { + vector names; + splitAtComma(itName->second, names); //parses bin into individual sequence names + + string rareAbund; + if (rareNames.count(itName->first) != 0) { //you are a rare name + rareAbund = ".rare"; + }else{ //you are a abund name + rareAbund = ".abund"; + } + + for (int i = 0; i < names.size(); i++) { + + string group = groupMap->getGroup(names[i]); + + if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl; + wroteFile[group+rareAbund] = true; + } + } + } + + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(filehandles[it3->first])).close(); + if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".groups"); } + else { remove((fileroot + tag + it3->first + ".groups").c_str()); } + delete it3->second; + } + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "parseGroups"); + exit(1); + } +} +/**********************************************************************************************************************/ +int SplitAbundCommand::parseFasta(string tag) { //namefile + try { + map filehandles; - //close files - in.close(); - outNameRare.close(); - outNameAbund.close(); - if (!wroteNameRare) { remove((fileroot + "rare.names").c_str()); } - else { outputNames.push_back((fileroot + "rare.names")); } - if (!wroteNameAbund) { remove((fileroot + "abund.names").c_str()); } - else { outputNames.push_back((fileroot + "abund.names")); } + if (Groups.size() == 0) { + ofstream aout; + ofstream rout; + + if (rareNames.size() != 0) { + string rare = outputDir + getRootName(getSimpleName(fastafile)) + tag + "rare.fasta"; + openOutputFile(rare, rout); + outputNames.push_back(rare); + } + + if (abundNames.size() != 0) { + string abund = outputDir + getRootName(getSimpleName(fastafile)) + tag + "abund.fasta"; + openOutputFile(abund, aout); + outputNames.push_back(abund); + } + + + //open input file + ifstream in; + openInputFile(fastafile, in); + + while (!in.eof()) { + if (m->control_pressed) { break; } - if (groupfile != "") { - outGroupRare.close(); outGroupAbund.close(); - if (!wroteGroupRare) { remove((outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group").c_str()); } - else { outputNames.push_back((outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group")); } - if (!wroteGroupAbund) { remove((outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group").c_str()); } - else { outputNames.push_back((outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group")); } - } + Sequence seq(in); gobble(in); + + if (seq.getName() != "") { + + map::iterator itNames; + + itNames = nameMap.find(seq.getName()); + + if (itNames == nameMap.end()) { + m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine(); + }else{ + if (rareNames.count(seq.getName()) != 0) { //you are a rare name + seq.printSequence(rout); + }else{ //you are a abund name + seq.printSequence(aout); + } + } + } + } + in.close(); + if (rareNames.size() != 0) { rout.close(); } + if (abundNames.size() != 0) { aout.close(); } + + }else{ //parse names by abundance and group + string fileroot = outputDir + getRootName(getSimpleName(fastafile)); + ofstream* temp; + ofstream* temp2; + map wroteFile; + map filehandles; + map::iterator it3; + + for (int i=0; icontrol_pressed) { break; } + + Sequence seq(in); gobble(in); + + if (seq.getName() != "") { + map::iterator itNames = nameMap.find(seq.getName()); + + if (itNames == nameMap.end()) { + m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine(); + }else{ + vector names; + splitAtComma(itNames->second, names); //parses bin into individual sequence names + + string rareAbund; + if (rareNames.count(itNames->first) != 0) { //you are a rare name + rareAbund = ".rare"; + }else{ //you are a abund name + rareAbund = ".abund"; + } + + for (int i = 0; i < names.size(); i++) { + + string group = groupMap->getGroup(seq.getName()); + + if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + seq.printSequence(*(filehandles[group+rareAbund])); + wroteFile[group+rareAbund] = true; + }else if(group == "not found") { + m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine(); + } + } + } + } + } + in.close(); + + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(filehandles[it3->first])).close(); + if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".fasta"); } + else { remove((fileroot + tag + it3->first + ".fasta").c_str()); } + delete it3->second; + } } - + return 0; } catch(exception& e) { - m->errorOut(e, "SplitAbundCommand", "split"); + m->errorOut(e, "SplitAbundCommand", "parseFasta"); exit(1); } } - /**********************************************************************************************************************/ - diff --git a/splitabundcommand.h b/splitabundcommand.h index 8d2ab2e..fb88380 100644 --- a/splitabundcommand.h +++ b/splitabundcommand.h @@ -34,18 +34,28 @@ public: private: - int split(ListVector*); - int split(); //namefile + int splitList(ListVector*); + int splitNames(); //namefile + int writeNames(); + int writeList(ListVector*); + int writeAccnos(string); + int parseGroup(string); + int parseFasta(string); + int readNamesFile(); //namefile + int createNameMap(ListVector*); vector outputNames; ListVector* list; GroupMap* groupMap; InputData* input; - string outputDir, listfile, namefile, groupfile, label; - set labels; - bool abort, allLines, accnos, wroteRareList, wroteAbundList; + string outputDir, listfile, namefile, groupfile, label, groups, fastafile, inputFile; + set labels, rareNames, abundNames; + vector Groups; + bool abort, allLines, accnos; int cutoff; + map wroteListFile; + map nameMap; diff --git a/splitmatrix.cpp b/splitmatrix.cpp index 718c8a3..0929cb0 100644 --- a/splitmatrix.cpp +++ b/splitmatrix.cpp @@ -12,13 +12,14 @@ /***********************************************************************/ -SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, string t){ +SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, string t, bool l){ m = MothurOut::getInstance(); distFile = distfile; cutoff = c; namefile = name; method = t; taxFile = tax; + large = l; } /***********************************************************************/ @@ -48,10 +49,185 @@ int SplitMatrix::split(){ int SplitMatrix::splitDistance(){ try { - vector > groups; + if (large) { splitDistanceLarge(); } + else { splitDistanceRAM(); } + + } + catch(exception& e) { + m->errorOut(e, "SplitMatrix", "splitDistance"); + exit(1); + } +} + +/***********************************************************************/ +int SplitMatrix::splitClassify(){ + try { + cutoff = int(cutoff); + + map seqGroup; + map::iterator it; + map::iterator it2; + + int numGroups = 0; + + //build tree from users taxonomy file + PhyloTree* phylo = new PhyloTree(); + + ifstream in; + openInputFile(taxFile, in); + + //read in users taxonomy file and add sequences to tree + string seqname, tax; + while(!in.eof()){ + in >> seqname >> tax; gobble(in); + + phylo->addSeqToTree(seqname, tax); + } + in.close(); + + phylo->assignHeirarchyIDs(0); + + //make sure the cutoff is not greater than maxlevel + if (cutoff > phylo->getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo->getMaxLevel(); } + + //for each node in tree + for (int i = 0; i < phylo->getNumNodes(); i++) { + + //is this node within the cutoff + TaxNode taxon = phylo->get(i); + + if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences + if (taxon.accessions.size() > 1) { //if this taxon just has one seq its a singleton + for (int j = 0; j < taxon.accessions.size(); j++) { + seqGroup[taxon.accessions[j]] = numGroups; + } + numGroups++; + } + } + } + + ifstream dFile; + openInputFile(distFile, dFile); + ofstream outFile; + + for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case + remove((distFile + "." + toString(i) + ".temp").c_str()); + } + //for buffering the io to improve speed //allow for 10 dists to be stored, then output. + vector outputs; outputs.resize(numGroups, ""); + vector numOutputs; numOutputs.resize(numGroups, 0); + + //for each distance + while(dFile){ + string seqA, seqB; + float dist; + + if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str()); } } + + dFile >> seqA >> seqB >> dist; gobble(dFile); + + //if both sequences are in the same group then they are within the cutoff + it = seqGroup.find(seqA); + it2 = seqGroup.find(seqB); + + if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons + if (it->second == it2->second) { //they are from the same group so add the distance + if (numOutputs[it->second] > 10) { + openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile); + outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl; + outFile.close(); + outputs[it->second] = ""; + numOutputs[it->second] = 0; + }else{ + outputs[it->second] += seqA + '\t' + seqB + '\t' + toString(dist) + '\n'; + numOutputs[it->second]++; + } + } + } + } + dFile.close(); + + for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case + remove((namefile + "." + toString(i) + ".temp").c_str()); + + //write out any remaining buffers + if (numOutputs[it->second] > 0) { + openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile); + outFile << outputs[i]; + outFile.close(); + outputs[i] = ""; + numOutputs[i] = 0; + } + } + + ifstream bigNameFile; + openInputFile(namefile, bigNameFile); + + singleton = namefile + ".extra.temp"; + ofstream remainingNames; + openOutputFile(singleton, remainingNames); + + bool wroteExtra = false; + + string name, nameList; + while(!bigNameFile.eof()){ + bigNameFile >> name >> nameList; gobble(bigNameFile); + + //did this sequence get assigned a group + it = seqGroup.find(name); + + if (it != seqGroup.end()) { + openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile); + outFile << name << '\t' << nameList << endl; + outFile.close(); + }else{ + wroteExtra = true; + remainingNames << name << '\t' << nameList << endl; + } + } + bigNameFile.close(); + remainingNames.close(); + + if (!wroteExtra) { + remove(singleton.c_str()); + singleton = "none"; + } + + for(int i=0;i temp; + temp[tempDistFile] = tempNameFile; + dists.push_back(temp); + } + + if (m->control_pressed) { + for (int i = 0; i < dists.size(); i++) { + remove((dists[i].begin()->first).c_str()); + remove((dists[i].begin()->second).c_str()); + } + dists.clear(); + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SplitMatrix", "splitClassify"); + exit(1); + } +} +/***********************************************************************/ +int SplitMatrix::splitDistanceLarge(){ + try { + vector > groups; + + //for buffering the io to improve speed + //allow for 30 dists to be stored, then output. vector outputs; vector numOutputs; vector wroteOutPut; @@ -68,14 +244,13 @@ int SplitMatrix::splitDistance(){ dFile >> seqA >> seqB >> dist; - if (m->control_pressed) { outFile.close(); dFile.close(); for(int i=0;i 0){ remove((distFile + "." + toString(i) + ".temp").c_str()); } } return 0; } + if (m->control_pressed) { dFile.close(); for(int i=0;i 0){ remove((distFile + "." + toString(i) + ".temp").c_str()); } } return 0; } if(dist < cutoff){ //cout << "in cutoff: " << dist << endl; int groupIDA = -1; int groupIDB = -1; int groupID = -1; - int prevGroupID = -1; for(int i=0;i::iterator aIt = groups[i].find(seqA); @@ -123,10 +298,6 @@ int SplitMatrix::splitDistance(){ newGroup.insert(seqB); groups.push_back(newGroup); - outFile.close(); - string fileName = distFile + "." + toString(numGroups) + ".temp"; - outFile.open(fileName.c_str(), ios::ate); - string tempOut = seqA + '\t' + seqB + '\t' + toString(dist) + '\n'; outputs.push_back(tempOut); numOutputs.push_back(1); @@ -136,16 +307,13 @@ int SplitMatrix::splitDistance(){ } else{ string fileName = distFile + "." + toString(groupID) + ".temp"; - - if(groupID != prevGroupID){ - outFile.close(); - outFile.open(fileName.c_str(), ios::app); - prevGroupID = groupID; - } - + //have we reached the max buffer size - if (numOutputs[groupID] > 10) { //write out sequence + if (numOutputs[groupID] > 60) { //write out sequence + outFile.open(fileName.c_str(), ios::app); outFile << outputs[groupID] << seqA << '\t' << seqB << '\t' << dist << endl; + outFile.close(); + outputs[groupID] = ""; numOutputs[groupID] = 0; wroteOutPut[groupID] = true; @@ -157,13 +325,20 @@ int SplitMatrix::splitDistance(){ if(groupIDA != -1 && groupIDB != -1){ //merge distance files of two groups you merged above string row, column, distance; if(groupIDA 60) { + outFile << outputs[groupID]; + outputs[groupID] = ""; + numOutputs[groupID] = 0; + } + + outFile.close(); wroteOutPut[groupID] = true; wroteOutPut[groupIDB] = false; - } - - if (numOutputs[groupID] != 0) { - outFile << outputs[groupID]; - wroteOutPut[groupID] = true; - outputs[groupID] = ""; - numOutputs[groupID] = 0; - - outputs[groupIDB] = ""; - numOutputs[groupIDB] = 0; - } - + }else{ } //just merge b's memory with a's memory } else{ numOutputs[groupID] += numOutputs[groupIDA]; outputs[groupID] += outputs[groupIDA]; + outputs[groupIDA] = ""; + numOutputs[groupIDA] = 0; + if (wroteOutPut[groupIDA]) { - string fileName = distFile + "." + toString(groupIDA) + ".temp"; - ifstream fileB(fileName.c_str(), ios::ate); + string fileName2 = distFile + "." + toString(groupIDA) + ".temp"; + ifstream fileB(fileName2.c_str(), ios::ate); + + outFile.open(fileName.c_str(), ios::app); long size; char* memblock; @@ -254,29 +432,26 @@ int SplitMatrix::splitDistance(){ delete memblock; fileB.close(); - remove(fileName.c_str()); + remove(fileName2.c_str()); + + //write out the merged memory + if (numOutputs[groupID] > 60) { + outFile << outputs[groupID]; + outputs[groupID] = ""; + numOutputs[groupID] = 0; + } + + outFile.close(); wroteOutPut[groupID] = true; wroteOutPut[groupIDA] = false; - } - - if (numOutputs[groupID] != 0) { - outFile << outputs[groupID]; - wroteOutPut[groupID] = true; - outputs[groupID] = ""; - numOutputs[groupID] = 0; - - outputs[groupIDA] = ""; - numOutputs[groupIDA] = 0; - } - + }else { } //just merge memory } } } } gobble(dFile); } - outFile.close(); dFile.close(); for (int i = 0; i < numGroups; i++) { @@ -287,6 +462,20 @@ int SplitMatrix::splitDistance(){ outFile.close(); } } + + splitNames(groups); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SplitMatrix", "splitDistanceLarge"); + exit(1); + } +} +//******************************************************************************************************************** +int SplitMatrix::splitNames(vector >& groups){ + try { + int numGroups = groups.size(); ifstream bigNameFile(namefile.c_str()); if(!bigNameFile){ @@ -353,173 +542,124 @@ int SplitMatrix::splitDistance(){ } return 0; - } catch(exception& e) { - m->errorOut(e, "SplitMatrix", "splitDistance"); + m->errorOut(e, "SplitMatrix", "splitNames"); exit(1); } } - -/***********************************************************************/ -int SplitMatrix::splitClassify(){ +//******************************************************************************************************************** +int SplitMatrix::splitDistanceRAM(){ try { - cutoff = int(cutoff); - - map seqGroup; - map::iterator it; - map::iterator it2; + vector > groups; + vector outputs; int numGroups = 0; - - //build tree from users taxonomy file - PhyloTree* phylo = new PhyloTree(); - - ifstream in; - openInputFile(taxFile, in); - - //read in users taxonomy file and add sequences to tree - string seqname, tax; - while(!in.eof()){ - in >> seqname >> tax; gobble(in); - - phylo->addSeqToTree(seqname, tax); - } - in.close(); - - phylo->assignHeirarchyIDs(0); - - //make sure the cutoff is not greater than maxlevel - if (cutoff > phylo->getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo->getMaxLevel(); } - - //for each node in tree - for (int i = 0; i < phylo->getNumNodes(); i++) { - - //is this node within the cutoff - TaxNode taxon = phylo->get(i); - - if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences - if (taxon.accessions.size() > 1) { //if this taxon just has one seq its a singleton - for (int j = 0; j < taxon.accessions.size(); j++) { - seqGroup[taxon.accessions[j]] = numGroups; - } - numGroups++; - } - } - } ifstream dFile; openInputFile(distFile, dFile); - ofstream outFile; - - for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case - remove((distFile + "." + toString(i) + ".temp").c_str()); - } - - - //for buffering the io to improve speed - //allow for 10 dists to be stored, then output. - vector outputs; outputs.resize(numGroups, ""); - vector numOutputs; numOutputs.resize(numGroups, 0); - - //for each distance + while(dFile){ string seqA, seqB; float dist; + + dFile >> seqA >> seqB >> dist; - if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str()); } } - - dFile >> seqA >> seqB >> dist; gobble(dFile); - - //if both sequences are in the same group then they are within the cutoff - it = seqGroup.find(seqA); - it2 = seqGroup.find(seqB); - - if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons - if (it->second == it2->second) { //they are from the same group so add the distance - if (numOutputs[it->second] > 10) { - openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile); - outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl; - outFile.close(); - outputs[it->second] = ""; - numOutputs[it->second] = 0; - }else{ - outputs[it->second] += seqA + '\t' + seqB + '\t' + toString(dist) + '\n'; - numOutputs[it->second]++; + if (m->control_pressed) { dFile.close(); for(int i=0;i 0){ remove((distFile + "." + toString(i) + ".temp").c_str()); } } return 0; } + + if(dist < cutoff){ + //cout << "in cutoff: " << dist << endl; + int groupIDA = -1; + int groupIDB = -1; + int groupID = -1; + + for(int i=0;i::iterator aIt = groups[i].find(seqA); + set::iterator bIt = groups[i].find(seqB); + + if(groupIDA == -1 && aIt != groups[i].end()){//seqA is not already assigned to a group and is in group[i], so assign seqB to group[i] + groups[i].insert(seqB); + groupIDA = i; + groupID = groupIDA; + + //cout << "in aIt: " << groupID << endl; + // break; + } + else if(groupIDB == -1 && bIt != groups[i].end()){//seqB is not already assigned to a group and is in group[i], so assign seqA to group[i] + groups[i].insert(seqA); + groupIDB = i; + groupID = groupIDB; + + // cout << "in bIt: " << groupID << endl; + // break; + } + + if(groupIDA != -1 && groupIDB != -1){//both ifs above have been executed, so we need to decide who to assign them to + if(groupIDA < groupIDB){ + // cout << "A: " << groupIDA << "\t" << groupIDB << endl; + groups[groupIDA].insert(groups[groupIDB].begin(), groups[groupIDB].end()); //merge two groups into groupIDA + groups[groupIDB].clear(); + groupID = groupIDA; + } + else{ + // cout << "B: " << groupIDA << "\t" << groupIDB << endl; + groups[groupIDB].insert(groups[groupIDA].begin(), groups[groupIDA].end()); //merge two groups into groupIDB + groups[groupIDA].clear(); + groupID = groupIDB; + } + break; + } + } + + //windows is gonna gag on the reuse of outFile, will need to make it local... + + if(groupIDA == -1 && groupIDB == -1){ //we need a new group + set newGroup; + newGroup.insert(seqA); + newGroup.insert(seqB); + groups.push_back(newGroup); + + string tempOut = seqA + '\t' + seqB + '\t' + toString(dist) + '\n'; + outputs.push_back(tempOut); + numGroups++; + } + else{ + + outputs[groupID] += seqA + '\t' + seqB + '\t' + toString(dist) + '\n'; + + if(groupIDA != -1 && groupIDB != -1){ //merge distance files of two groups you merged above + string row, column, distance; + if(groupIDAsecond] > 0) { - openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile); - outFile << outputs[i]; - outFile.close(); - outputs[i] = ""; - numOutputs[i] = 0; - } - } - ifstream bigNameFile; - openInputFile(namefile, bigNameFile); - - singleton = namefile + ".extra.temp"; - ofstream remainingNames; - openOutputFile(singleton, remainingNames); - - bool wroteExtra = false; - - string name, nameList; - while(!bigNameFile.eof()){ - bigNameFile >> name >> nameList; gobble(bigNameFile); - - //did this sequence get assigned a group - it = seqGroup.find(name); - - if (it != seqGroup.end()) { - openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile); - outFile << name << '\t' << nameList << endl; + for (int i = 0; i < numGroups; i++) { + if (outputs[i] != "") { + ofstream outFile; + string fileName = distFile + "." + toString(i) + ".temp"; + outFile.open(fileName.c_str(), ios::ate); + outFile << outputs[i]; outFile.close(); - }else{ - wroteExtra = true; - remainingNames << name << '\t' << nameList << endl; } } - bigNameFile.close(); - remainingNames.close(); - - if (!wroteExtra) { - remove(singleton.c_str()); - singleton = "none"; - } - - for(int i=0;i temp; - temp[tempDistFile] = tempNameFile; - dists.push_back(temp); - } - - if (m->control_pressed) { - for (int i = 0; i < dists.size(); i++) { - remove((dists[i].begin()->first).c_str()); - remove((dists[i].begin()->second).c_str()); - } - dists.clear(); - } - - return 0; - + return 0; } catch(exception& e) { - m->errorOut(e, "SplitMatrix", "splitClassify"); + m->errorOut(e, "SplitMatrix", "splitDistanceRAM"); exit(1); } } diff --git a/splitmatrix.h b/splitmatrix.h index e38f8fc..95fe0dc 100644 --- a/splitmatrix.h +++ b/splitmatrix.h @@ -19,7 +19,7 @@ class SplitMatrix { public: - SplitMatrix(string, string, string, float, string); //column formatted distance file, namesfile, cutoff, method + SplitMatrix(string, string, string, float, string, bool); //column formatted distance file, namesfile, cutoff, method ~SplitMatrix(); int split(); vector< map > getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size @@ -31,9 +31,13 @@ class SplitMatrix { string distFile, namefile, singleton, method, taxFile; vector< map< string, string> > dists; float cutoff; + bool large; int splitDistance(); int splitClassify(); + int splitDistanceLarge(); + int splitDistanceRAM(); + int splitNames(vector >& groups); }; /******************************************************/ diff --git a/tree.cpp b/tree.cpp index 8e45981..400a72a 100644 --- a/tree.cpp +++ b/tree.cpp @@ -736,39 +736,20 @@ int Tree::readTreeString(ifstream& filehandle) { //cout << " at beginning of while " << k << endl; if(c == ')') { //to pass over labels in trees - c=filehandle.get(); - while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); } - filehandle.putback(c); + string label = readLabel(filehandle); } + if(c == ';') { return 0; } if(c == -1) { return 0; } + //if you are a name if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space - name = ""; - c = filehandle.get(); - //k = c; -//cout << k << endl; - while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { - name += c; - c = filehandle.get(); - //k = c; -//cout << " in name while " << k << endl; - } - -//cout << "name = " << name << endl; + name = readName(filehandle); globaldata->Treenames.push_back(name); - filehandle.putback(c); -//k = c; -//cout << " after putback" << k << endl; } if(c == ':') { //read until you reach the end of the branch length - while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { - c = filehandle.get(); - //k = c; - //cout << " in branch while " << k << endl; - } - filehandle.putback(c); + string bl = readBranchLength(filehandle); } c = filehandle.get(); @@ -789,6 +770,68 @@ int Tree::readTreeString(ifstream& filehandle) { } /*******************************************************/ +string Tree::readLabel(ifstream& filehandle) { + try { + + string label = ""; + + //to pass over labels in trees + int c=filehandle.get(); + while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ label += c; c=filehandle.get(); } + filehandle.putback(c); + + return label; + + } + catch(exception& e) { + m->errorOut(e, "Tree", "readLabel"); + exit(1); + } +} +/*******************************************************/ +string Tree::readName(ifstream& filehandle) { + try { + + string name = ""; + int c = filehandle.get(); + + while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { + name += c; + c = filehandle.get(); + } + +//cout << "name = " << name << endl; + filehandle.putback(c); + + return name; + + } + catch(exception& e) { + m->errorOut(e, "Tree", "readName"); + exit(1); + } +} +/*******************************************************/ +string Tree::readBranchLength(ifstream& filehandle) { + try { + + string br = ""; + int c; + while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { + br += c; + c = filehandle.get(); + } + filehandle.putback(c); + + return br; + + } + catch(exception& e) { + m->errorOut(e, "Tree", "readBranchLength"); + exit(1); + } +} /*******************************************************/ +/*******************************************************/ diff --git a/tree.h b/tree.h index 23ddf5d..99cd68f 100644 --- a/tree.h +++ b/tree.h @@ -62,6 +62,10 @@ private: //not included in the tree. //only takes names from the first tree in the tree file and assumes that all trees use the same names. int readTreeString(ifstream&); + string readLabel(ifstream&); + string readName(ifstream&); + string readBranchLength(ifstream&); + MothurOut* m; }; -- 2.39.2