From: Sarah Westcott Date: Tue, 30 Oct 2012 18:04:45 +0000 (-0400) Subject: classify.seqs allows sequences to be in taxonomy file that are not in template. ... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=9b53f130ac9af5e95444ce2e817fce25ed19ff03 classify.seqs allows sequences to be in taxonomy file that are not in template. it ignores them. fixed windows bug in clearcut char* declarations. get.lineage and remove.lineage can now handle taxons with (). list vector sorts list file outs by abundance. bugs while testing --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index ecb0619..df58a36 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -367,8 +367,8 @@ 8DD76FAF0486AB0100D96B5E /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 8; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; + dstPath = Users/SarahsWork/desktop/debug; + dstSubfolderSpec = 16; files = ( 8DD76FB00486AB0100D96B5E /* mothur.1 in CopyFiles */, ); @@ -1530,8 +1530,6 @@ A7E9B67112D37EC400DA6239 /* canberra.h */, A7E9B67612D37EC400DA6239 /* chao1.cpp */, A7E9B67712D37EC400DA6239 /* chao1.h */, - A7E9B6A412D37EC400DA6239 /* cmdargs.cpp */, - A7E9B6A512D37EC400DA6239 /* cmdargs.h */, A7E9B6BB12D37EC400DA6239 /* coverage.cpp */, A7E9B6BC12D37EC400DA6239 /* coverage.h */, A7E9B6CA12D37EC400DA6239 /* dist.h */, @@ -1821,6 +1819,8 @@ children = ( A7E9B69412D37EC400DA6239 /* clearcut.cpp */, A7E9B69512D37EC400DA6239 /* clearcut.h */, + A7E9B6A412D37EC400DA6239 /* cmdargs.cpp */, + A7E9B6A512D37EC400DA6239 /* cmdargs.h */, A7E9B6B312D37EC400DA6239 /* common.h */, A7E9B6CF12D37EC400DA6239 /* distclearcut.cpp */, A7E9B6D012D37EC400DA6239 /* distclearcut.h */, @@ -2292,9 +2292,12 @@ ALWAYS_SEARCH_USER_PATHS = NO; COPY_PHASE_STRIP = NO; DEPLOYMENT_LOCATION = YES; + DSTROOT = TARGET_BUILD_DIR; + "DSTROOT[sdk=*]" = TARGET_BUILD_DIR; GCC_DYNAMIC_NO_PIC = NO; GCC_MODEL_TUNING = G5; GCC_OPTIMIZATION_LEVEL = 3; + "INSTALL_PATH[sdk=*]" = TARGET_BUILD_DIR; PRODUCT_NAME = mothur; SDKROOT = macosx10.6; SKIP_INSTALL = NO; @@ -2307,6 +2310,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEPLOYMENT_LOCATION = YES; + DSTROOT = TARGET_BUILD_DIR; GCC_MODEL_TUNING = G5; GCC_OPTIMIZATION_LEVEL = 3; GCC_WARN_UNUSED_VALUE = YES; @@ -2400,7 +2404,7 @@ 1DEB928708733DD80010E9CD /* Release */, ); defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; + defaultConfigurationName = Debug; }; 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = { isa = XCConfigurationList; @@ -2409,7 +2413,7 @@ 1DEB928B08733DD80010E9CD /* Release */, ); defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; + defaultConfigurationName = Debug; }; /* End XCConfigurationList section */ }; diff --git a/bayesian.cpp b/bayesian.cpp index 49be4af..cf70010 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -255,9 +255,8 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { /**************************************************************************************************/ Bayesian::~Bayesian() { try { - - delete phyloTree; - if (database != NULL) { delete database; } + if (phyloTree != NULL) { delete phyloTree; } + if (database != NULL) { delete database; } } catch(exception& e) { m->errorOut(e, "Bayesian", "~Bayesian"); diff --git a/classify.cpp b/classify.cpp index 15ef0aa..36179f4 100644 --- a/classify.cpp +++ b/classify.cpp @@ -236,7 +236,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } /**************************************************************************************************/ -Classify::Classify() { m = MothurOut::getInstance(); database = NULL; flipped=false; } +Classify::Classify() { m = MothurOut::getInstance(); database = NULL; phyloTree=NULL; flipped=false; } /**************************************************************************************************/ int Classify::readTaxonomy(string file) { diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 7f262bf..0012cd2 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -634,7 +634,7 @@ int ClassifySeqsCommand::execute(){ m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); string baseTName = m->getSimpleName(taxonomyFileName); - if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy(); } + if (taxonomyFileName == "saved") { baseTName = rdb->getSavedTaxonomy(); } //set rippedTaxName to string RippedTaxName = ""; @@ -897,6 +897,7 @@ int ClassifySeqsCommand::execute(){ } #endif } + delete classify; m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -916,7 +917,7 @@ int ClassifySeqsCommand::execute(){ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } - delete classify; + return 0; } diff --git a/clearcutcommand.cpp b/clearcutcommand.cpp index 6a0eb25..55fbe0a 100644 --- a/clearcutcommand.cpp +++ b/clearcutcommand.cpp @@ -269,51 +269,50 @@ int ClearcutCommand::execute() { vector cPara; - char* tempClearcut = new char[8]; - strcpy(tempClearcut, "clearcut"); + char* tempClearcut = new char[9]; + *tempClearcut = '\0'; strncat(tempClearcut, "clearcut", 8); cPara.push_back(tempClearcut); //you gave us a distance matrix - if (phylipfile != "") { char* temp = new char[10]; strcpy(temp, "--distance"); cPara.push_back(temp); } + if (phylipfile != "") { char* temp = new char[11]; *temp = '\0'; strncat(temp, "--distance", 10); cPara.push_back(temp); } //you gave us a fastafile - if (fastafile != "") { char* temp = new char[11]; strcpy(temp, "--alignment"); cPara.push_back(temp); } + if (fastafile != "") { char* temp = new char[12]; *temp = '\0'; strncat(temp, "--alignment", 11); cPara.push_back(temp); } - if (version) { char* temp = new char[9]; strcpy(temp, "--version"); cPara.push_back(temp); } - if (verbose) { char* temp = new char[9]; strcpy(temp, "--verbose"); cPara.push_back(temp); } - if (quiet) { char* temp = new char[7]; strcpy(temp, "--quiet"); cPara.push_back(temp); } + if (version) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--version", 9); cPara.push_back(temp); } + if (verbose) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--verbose", 9); cPara.push_back(temp); } + if (quiet) { char* temp = new char[8]; *temp = '\0'; strncat(temp, "--quiet", 7); cPara.push_back(temp); } if (seed != "*") { string tempSeed = "--seed=" + seed; - char* temp = new char[tempSeed.length()]; - strcpy(temp, tempSeed.c_str()); + char* temp = new char[tempSeed.length()+1]; + *temp = '\0'; strncat(temp, tempSeed.c_str(), tempSeed.length()); cPara.push_back(temp); } - if (norandom) { char* temp = new char[10]; strcpy(temp, "--norandom"); cPara.push_back(temp); } - if (shuffle) { char* temp = new char[9]; strcpy(temp, "--shuffle"); cPara.push_back(temp); } - if (neighbor) { char* temp = new char[10]; strcpy(temp, "--neighbor"); cPara.push_back(temp); } + if (norandom) { char* temp = new char[11]; *temp = '\0'; strncat(temp, "--norandom", 10); cPara.push_back(temp); } + if (shuffle) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--shuffle", 9); cPara.push_back(temp); } + if (neighbor) { char* temp = new char[11]; *temp = '\0'; strncat(temp, "--neighbor", 10); cPara.push_back(temp); } string tempIn = "--in=" + inputFile; - char* tempI = new char[tempIn.length()]; - strcpy(tempI, tempIn.c_str()); + char* tempI = new char[tempIn.length()+1]; + *tempI = '\0'; strncat(tempI, tempIn.c_str(), tempIn.length()); cPara.push_back(tempI); - if (stdoutWanted) { char* temp = new char[8]; strcpy(temp, "--stdout"); cPara.push_back(temp); } + if (stdoutWanted) { char* temp = new char[9]; *temp = '\0'; strncat(temp, "--stdout", 8); cPara.push_back(temp); } else{ string tempOut = "--out=" + outputName; - - char* temp = new char[tempOut.length()]; - strcpy(temp, tempOut.c_str()); + char* temp = new char[tempOut.length()+1]; + *temp = '\0'; strncat(temp, tempOut.c_str(), tempOut.length()); cPara.push_back(temp); } - if (DNA) { char* temp = new char[5]; strcpy(temp, "--DNA"); cPara.push_back(temp); } - if (protein) { char* temp = new char[9]; strcpy(temp, "--protein"); cPara.push_back(temp); } - if (jukes) { char* temp = new char[7]; strcpy(temp, "--jukes"); cPara.push_back(temp); } - if (kimura) { char* temp = new char[8]; strcpy(temp, "--kimura"); cPara.push_back(temp); } + if (DNA) { char* temp = new char[6]; *temp = '\0'; strncat(temp, "--DNA", 5); cPara.push_back(temp); } + if (protein) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--protein", 9); cPara.push_back(temp); } + if (jukes) { char* temp = new char[8]; *temp = '\0'; strncat(temp, "--jukes", 7); cPara.push_back(temp); } + if (kimura) { char* temp = new char[9]; *temp = '\0'; strncat(temp, "--kimura", 8); cPara.push_back(temp); } if (matrixout != "") { string tempMatrix = "--matrixout=" + outputDir + matrixout; - char* temp = new char[tempMatrix.length()]; - strcpy(temp, tempMatrix.c_str()); + char* temp = new char[tempMatrix.length()+1]; + *temp = '\0'; strncat(temp, tempMatrix.c_str(), tempMatrix.length()); cPara.push_back(temp); outputNames.push_back((outputDir + matrixout)); outputTypes["matrixout"].push_back((outputDir + matrixout)); @@ -321,13 +320,13 @@ int ClearcutCommand::execute() { if (ntrees != "1") { string tempNtrees = "--ntrees=" + ntrees; - char* temp = new char[tempNtrees.length()]; - strcpy(temp, tempNtrees.c_str()); + char* temp = new char[tempNtrees.length()+1]; + *temp = '\0'; strncat(temp, tempNtrees.c_str(), tempNtrees.length()); cPara.push_back(temp); } - if (expblen) { char* temp = new char[9]; strcpy(temp, "--expblen"); cPara.push_back(temp); } - if (expdist) { char* temp = new char[9]; strcpy(temp, "--expdist"); cPara.push_back(temp); } + if (expblen) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--expblen", 9); cPara.push_back(temp); } + if (expdist) { char* temp = new char[10]; *temp = '\0'; strncat(temp, "--expdist", 9); cPara.push_back(temp); } char** clearcutParameters; clearcutParameters = new char*[cPara.size()]; diff --git a/clustercommand.cpp b/clustercommand.cpp index 06e627a..b46e1fb 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -312,7 +312,7 @@ int ClusterCommand::execute(){ ct = new CountTable(); ct->readTable(countfile); read->read(ct); - } + }else { read->read(nameMap); } list = read->getListVector(); matrix = read->getDMatrix(); diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index b3ce0f9..09c2da8 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -1285,7 +1285,7 @@ string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile ct = new CountTable(); ct->readTable(thisNamefile); read->read(ct); - } + }else { read->read(nameMap); } list = read->getListVector(); oldList = *list; diff --git a/getlineagecommand.cpp b/getlineagecommand.cpp index 645655d..99359bd 100644 --- a/getlineagecommand.cpp +++ b/getlineagecommand.cpp @@ -814,20 +814,27 @@ int GetLineageCommand::readTax(){ /**************************************************************************************************/ vector< map > GetLineageCommand::getTaxons(string tax) { try { - + vector< map > t; string taxon = ""; int taxLength = tax.length(); + for(int i=0;iisNumeric1(confidenceScore)) { //its a confidence + newtaxon = taxon.substr(0, openParen); //rip off confidence + confidence = taxon.substr((openParen+1), (closeParen-openParen-1)); + }else { //its part of the taxon + newtaxon = taxon; + confidence = "0"; + } }else{ newtaxon = taxon; confidence = "0"; @@ -837,12 +844,13 @@ vector< map > GetLineageCommand::getTaxons(string tax) { map temp; temp[newtaxon] = con; + t.push_back(temp); - taxon = ""; } else{ taxon += tax[i]; + } } diff --git a/listvector.cpp b/listvector.cpp index 9369a12..2758c94 100644 --- a/listvector.cpp +++ b/listvector.cpp @@ -13,6 +13,27 @@ #include "ordervector.hpp" #include "listvector.hpp" +//sorts highest to lowest +/***********************************************************************/ +inline bool abundNamesSort(string left, string right){ + + int countLeft = 0; + if(left != ""){ + countLeft = 1; + for(int i=0;i countRight) { + return true; + } + return false; +} /***********************************************************************/ @@ -135,9 +156,12 @@ void ListVector::print(ostream& output){ try { output << label << '\t' << numBins << '\t'; - for(int i=0;i hold = data; + sort(hold.begin(), hold.end(), abundNamesSort); + + for(int i=0;i thisLookup){ string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + "." + getOutputFileNameTag("phylip"); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); + ofstream outDist; m->openOutputFile(distFileName, outDist); outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); @@ -687,7 +688,7 @@ int MatrixOutputCommand::process(vector thisLookup){ for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } } - if (iters != 1) { + if (iters != 0) { //we need to find the average distance and standard deviation for each groups distance vector< vector > calcAverages; calcAverages.resize(matrixCalculators.size()); diff --git a/mothurout.cpp b/mothurout.cpp index 37c0916..124fbb8 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1183,7 +1183,7 @@ string MothurOut::sortFile(string distFile, string outputDir){ string firstName, secondName; float dist; - while (input) { + while (!input.eof()) { input >> firstName >> secondName >> dist; output << dist << '\t' << firstName << '\t' << secondName << endl; gobble(input); @@ -1199,16 +1199,17 @@ string MothurOut::sortFile(string distFile, string outputDir){ //read in sorted file and put distance at end again ifstream input2; + ofstream output2; openInputFile(tempOutfile, input2); - openOutputFile(outfile, output); + openOutputFile(outfile, output2); - while (input2) { + while (!input2.eof()) { input2 >> dist >> firstName >> secondName; - output << firstName << '\t' << secondName << '\t' << dist << endl; + output2 << firstName << '\t' << secondName << '\t' << dist << endl; gobble(input2); } input2.close(); - output.close(); + output2.close(); //remove temp files mothurRemove(tempDistFile); @@ -2470,6 +2471,9 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){ //This function parses the estimator options and puts them in a vector void MothurOut::splitAtChar(string& estim, vector& container, char symbol) { try { + + if (symbol == '-') { splitAtDash(estim, container); return; } + string individual = ""; int estimLength = estim.size(); for(int i=0;igetSavedTaxonomy(); } string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum")); ifstream FileTest(taxFileNameTest.c_str()); @@ -72,6 +74,7 @@ PhyloSummary::PhyloSummary(string refTfile, GroupMap* g){ ct = NULL; //check for necessary files + if (refTfile == "saved") { ReferenceDB* rdb = ReferenceDB::getInstance(); refTfile = rdb->getSavedTaxonomy(); } string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum")); ifstream FileTest(taxFileNameTest.c_str()); diff --git a/removelineagecommand.cpp b/removelineagecommand.cpp index 2b930b5..56b24cd 100644 --- a/removelineagecommand.cpp +++ b/removelineagecommand.cpp @@ -814,25 +814,30 @@ vector< map > RemoveLineageCommand::getTaxons(string tax) { int taxLength = tax.length(); for(int i=0;iisNumeric1(confidenceScore)) { //its a confidence + newtaxon = taxon.substr(0, openParen); //rip off confidence + confidence = taxon.substr((openParen+1), (closeParen-openParen-1)); + }else { //its part of the taxon + newtaxon = taxon; + confidence = "0"; + } }else{ newtaxon = taxon; confidence = "0"; - } + } float con = 0; convert(confidence, con); map temp; temp[newtaxon] = con; t.push_back(temp); - taxon = ""; } else{ diff --git a/splitgroupscommand.cpp b/splitgroupscommand.cpp index f3c6cd9..bc52112 100644 --- a/splitgroupscommand.cpp +++ b/splitgroupscommand.cpp @@ -36,14 +36,14 @@ vector SplitGroupCommand::setParameters(){ string SplitGroupCommand::getHelpString(){ try { string helpString = ""; - helpString += "The split.group command reads a group or count file, and parses your fasta and names or count files by groups. \n"; - helpString += "The split.group command parameters are fasta, name, group, count and groups.\n"; + helpString += "The split.groups command reads a group or count file, and parses your fasta and names or count files by groups. \n"; + helpString += "The split.groups command parameters are fasta, name, group, count and groups.\n"; helpString += "The fasta and group or count parameters are required.\n"; helpString += "The groups parameter allows you to select groups to create files for. \n"; helpString += "For example if you set groups=A-B-C, you will get a .A.fasta, .A.names, .B.fasta, .B.names, .C.fasta, .C.names files. \n"; helpString += "If you want .fasta and .names files for all groups, set groups=all. \n"; - helpString += "The split.group command should be used in the following format: split.group(fasta=yourFasta, group=yourGroupFile).\n"; - helpString += "Example: split.group(fasta=abrecovery.fasta, group=abrecovery.groups).\n"; + helpString += "The split.groups command should be used in the following format: split.group(fasta=yourFasta, group=yourGroupFile).\n"; + helpString += "Example: split.groups(fasta=abrecovery.fasta, group=abrecovery.groups).\n"; helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; return helpString; } diff --git a/splitmatrix.cpp b/splitmatrix.cpp index 28bc5d4..aea99ef 100644 --- a/splitmatrix.cpp +++ b/splitmatrix.cpp @@ -637,7 +637,7 @@ int SplitMatrix::splitNames(map& seqGroup, int numGroups, vectoropenOutputFile(newtempNameFile, out); - out << headers << endl; + out << "Representative_Sequence\ttotal" << endl; out.close(); m->appendFiles(tempNameFile, newtempNameFile); m->mothurRemove(tempNameFile); @@ -671,7 +671,7 @@ int SplitMatrix::splitNames(map& seqGroup, int numGroups, vectoropenOutputFile(newtempNameFile, out); - out << headers << endl; + out << "Representative_Sequence\ttotal" << endl; out.close(); m->appendFiles(singleton, newtempNameFile); m->mothurRemove(singleton); diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index bba6289..8ee11d3 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -453,6 +453,7 @@ int TreeGroupCommand::execute(){ readMatrix->setCutoff(cutoff); ct = NULL; + nameMap = NULL; if(namefile != ""){ nameMap = new NameAssignment(namefile); nameMap->readMap(); @@ -461,6 +462,8 @@ int TreeGroupCommand::execute(){ ct = new CountTable(); ct->readTable(countfile); readMatrix->read(ct); + }else { + readMatrix->read(nameMap); } list = readMatrix->getListVector(); diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index e698fff..541131e 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -305,7 +305,8 @@ int UnifracWeightedCommand::execute() { string s; //to make work with setgroups Groups = m->getGroups(); vector nameGroups = ct->getNamesOfGroups(); - util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze + if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } + util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze m->setGroups(Groups); if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }