From 64581f6d0e63e67d4e119601bea695ebb3f52a13 Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 18 Apr 2011 17:55:59 +0000 Subject: [PATCH] modified chimera.slayer template=self --- Mothur.xcodeproj/project.pbxproj | 10 +- chimeraslayer.cpp | 201 +++++++++++-------------------- chimeraslayer.h | 14 ++- chimeraslayercommand.cpp | 74 ++++++++++-- chimeraslayercommand.h | 4 +- mothur.h | 14 +++ mothurout.cpp | 36 ++++++ mothurout.h | 1 + 8 files changed, 197 insertions(+), 157 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 6bc5e17..d3b9cfc 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1980,8 +1980,8 @@ GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( "MOTHUR_FILES=\"\\\"../release\\\"\"", - "VERSION=\"\\\"1.18.0\\\"\"", - "RELEASE_DATE=\"\\\"4/11/2011\\\"\"", + "VERSION=\"\\\"1.18.1\\\"\"", + "RELEASE_DATE=\"\\\"4/15/2011\\\"\"", ); GCC_WARN_ABOUT_MISSING_NEWLINE = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; @@ -2013,8 +2013,8 @@ GCC_MODEL_TUNING = ""; GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( - "VERSION=\"\\\"1.18.0\\\"\"", - "RELEASE_DATE=\"\\\"4/11/2011\\\"\"", + "VERSION=\"\\\"1.18.1\\\"\"", + "RELEASE_DATE=\"\\\"4/15/2011\\\"\"", ); GCC_WARN_ABOUT_MISSING_NEWLINE = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; @@ -2036,7 +2036,7 @@ "-lreadline", ); PREBINDING = NO; - SDKROOT = macosx10.5; + SDKROOT = macosx10.6; }; name = Release; }; diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 77a8695..8c9417a 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -45,7 +45,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num } } //*************************************************************************************************************** -ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { try { fastafile = file; templateSeqs = readSeqs(fastafile); @@ -65,78 +65,21 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, s increment = inc; numWanted = numw; realign = r; - includeAbunds = abunds; trimChimera = trim; - //read name file and create nameMapRank - readNameFile(name); - decalc = new DeCalculator(); createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap //run filter on template - for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]); } - + for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } templateSeqs.clear(); + } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); exit(1); } } -//*************************************************************************************************************** -int ChimeraSlayer::readNameFile(string name) { - try { - ifstream in; - m->openInputFile(name, in); - - int maxRank = 0; - int minRank = 10000000; - - while(!in.eof()){ - - if (m->control_pressed) { in.close(); return 0; } - - string thisname, repnames; - - in >> thisname; m->gobble(in); //read from first column - in >> repnames; //read from second column - - map >::iterator it = nameMapRank.find(thisname); - if (it == nameMapRank.end()) { - - vector splitRepNames; - m->splitAtComma(repnames, splitRepNames); - - nameMapRank[thisname] = splitRepNames; - - if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); } - if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); } - - }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); } - - m->gobble(in); - } - in.close(); - - //sanity check to make sure files match - for (int i = 0; i < templateSeqs.size(); i++) { - map >::iterator it = nameMapRank.find(templateSeqs[i]->getName()); - - if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true; } - } - - if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } - - return 0; - - } - catch(exception& e) { - m->errorOut(e, "ChimeraSlayer", "readNameFile"); - exit(1); - } -} - //*************************************************************************************************************** int ChimeraSlayer::doPrep() { try { @@ -274,49 +217,9 @@ int ChimeraSlayer::doPrep() { } } //*************************************************************************************************************** -vector ChimeraSlayer::getTemplate(Sequence* q) { +int ChimeraSlayer::getTemplate(Sequence* q) { try { - vector thisTemplate; - - int thisRank; - string thisName = q->getName(); - map >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked - thisRank = (itRank->second).size(); - - //create list of names we want to put into the template - set namesToAdd; - for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) { - if (itRank->first != thisName) { - if (includeAbunds == "greaterequal") { - if ((itRank->second).size() >= thisRank) { - //you are more abundant than me or equal to my abundance - for (int i = 0; i < (itRank->second).size(); i++) { - namesToAdd.insert((itRank->second)[i]); - } - } - }else if (includeAbunds == "greater") { - if ((itRank->second).size() > thisRank) { - //you are more abundant than me - for (int i = 0; i < (itRank->second).size(); i++) { - namesToAdd.insert((itRank->second)[i]); - } - } - }else if (includeAbunds == "all") { - //add everyone - for (int i = 0; i < (itRank->second).size(); i++) { - namesToAdd.insert((itRank->second)[i]); - } - } - } - } - - for (int i = 0; i < templateSeqs.size(); i++) { - if (namesToAdd.count(templateSeqs[i]->getName()) != 0) { - thisTemplate.push_back(templateSeqs[i]); - } - } - string kmerDBNameLeft; string kmerDBNameRight; @@ -329,70 +232,70 @@ vector ChimeraSlayer::getTemplate(Sequence* q) { string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName)); databaseLeft = new KmerDB(leftTemplateFileName, kmerSize); #ifdef USE_MPI - for (int i = 0; i < thisTemplate.size(); i++) { + for (int i = 0; i < userTemplate.size(); i++) { - if (m->control_pressed) { return thisTemplate; } + if (m->control_pressed) { return 0; } - string leftFrag = thisTemplate[i]->getUnaligned(); + string leftFrag = userTemplate[i]->getUnaligned(); leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); - Sequence leftTemp(thisTemplate[i]->getName(), leftFrag); + Sequence leftTemp(userTemplate[i]->getName(), leftFrag); databaseLeft->addSequence(leftTemp); } databaseLeft->generateDB(); - databaseLeft->setNumSeqs(thisTemplate.size()); + databaseLeft->setNumSeqs(userTemplate.size()); - for (int i = 0; i < thisTemplate.size(); i++) { - if (m->control_pressed) { return thisTemplate; } + for (int i = 0; i < userTemplate.size(); i++) { + if (m->control_pressed) { return 0; } - string rightFrag = thisTemplate[i]->getUnaligned(); + string rightFrag = userTemplate[i]->getUnaligned(); rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); - Sequence rightTemp(thisTemplate[i]->getName(), rightFrag); + Sequence rightTemp(userTemplate[i]->getName(), rightFrag); databaseRight->addSequence(rightTemp); } databaseRight->generateDB(); - databaseRight->setNumSeqs(thisTemplate.size()); + databaseRight->setNumSeqs(userTemplate.size()); #else - for (int i = 0; i < thisTemplate.size(); i++) { + for (int i = 0; i < userTemplate.size(); i++) { - if (m->control_pressed) { return thisTemplate; } + if (m->control_pressed) { return 0; } - string leftFrag = thisTemplate[i]->getUnaligned(); + string leftFrag = userTemplate[i]->getUnaligned(); leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); - Sequence leftTemp(thisTemplate[i]->getName(), leftFrag); + Sequence leftTemp(userTemplate[i]->getName(), leftFrag); databaseLeft->addSequence(leftTemp); } databaseLeft->generateDB(); - databaseLeft->setNumSeqs(thisTemplate.size()); + databaseLeft->setNumSeqs(userTemplate.size()); - for (int i = 0; i < thisTemplate.size(); i++) { - if (m->control_pressed) { return thisTemplate; } + for (int i = 0; i < userTemplate.size(); i++) { + if (m->control_pressed) { return 0; } - string rightFrag = thisTemplate[i]->getUnaligned(); + string rightFrag = userTemplate[i]->getUnaligned(); rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); - Sequence rightTemp(thisTemplate[i]->getName(), rightFrag); + Sequence rightTemp(userTemplate[i]->getName(), rightFrag); databaseRight->addSequence(rightTemp); } databaseRight->generateDB(); - databaseRight->setNumSeqs(thisTemplate.size()); + databaseRight->setNumSeqs(userTemplate.size()); #endif }else if (searchMethod == "blast") { //generate blastdb databaseLeft = new BlastDB(-1.0, -1.0, 1, -3); - for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; } databaseLeft->addSequence(*thisTemplate[i]); } + for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return 0; } databaseLeft->addSequence(*userTemplate[i]); } databaseLeft->generateDB(); - databaseLeft->setNumSeqs(thisTemplate.size()); + databaseLeft->setNumSeqs(userTemplate.size()); } - return thisTemplate; + return 0; } catch(exception& e) { @@ -407,6 +310,12 @@ ChimeraSlayer::~ChimeraSlayer() { if (templateFileName != "self") { if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } else if (searchMethod == "blast") { delete databaseLeft; } + }else { + //delete userTemplate + for (int i = 0; i < userTemplate.size(); i++) { + delete userTemplate[i]; + } + userTemplate.clear(); } } //*************************************************************************************************************** @@ -421,7 +330,7 @@ void ChimeraSlayer::printHeader(ostream& out) { Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { try { Sequence* trim = NULL; - if (trimChimera) { trim = trimQuery; } + if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); } if (chimeraFlags == "yes") { string chimeraFlag = "no"; @@ -448,13 +357,19 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { } trim->setAligned(newAligned); } - } } printBlock(chimeraResults[0], chimeraFlag, out); out << endl; - }else { out << querySeq->getName() << "\tno" << endl; } + }else { + out << querySeq->getName() << "\tno" << endl; + if (templateFileName == "self") { + Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); + runFilter(temp); + userTemplate.push_back(temp); + } + } return trim; @@ -553,7 +468,14 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out); out << endl; - }else { out << querySeq->getName() << "\tno" << endl; } + }else { + out << querySeq->getName() << "\tno" << endl; + if (templateFileName == "self") { + Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); + runFilter(temp); + userTemplate.push_back(temp); + } + } return trim; @@ -688,6 +610,12 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); delete buf; + + if (template == "self") { + Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); + runFilter(temp); + userTemplate.push_back(temp); + } } @@ -707,7 +635,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { string outputString = ""; Sequence* trim = NULL; - if (trimChimera) { trim = trimQuery; } + if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); } if (chimeraFlags == "yes") { string chimeraFlag = "no"; @@ -766,6 +694,12 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); delete buf; + + if (template == "self") { + Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); + runFilter(temp); + userTemplate.push_back(temp); + } } return trim; @@ -780,10 +714,9 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { //*************************************************************************************************************** int ChimeraSlayer::getChimeras(Sequence* query) { try { - if (trimChimera) { - trimQuery = new Sequence(query->getName(), query->getAligned()); - printResults.trimQuery = *trimQuery; - } + + trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned()); + printResults.trimQuery = trimQuery; chimeraFlags = "no"; printResults.flag = "no"; @@ -797,7 +730,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) { //you must create a template vector thisTemplate; if (templateFileName != "self") { thisTemplate = templateSeqs; } - else { thisTemplate = getTemplate(query); } //fills this template and creates the databases + else { getTemplate(query); thisTemplate = userTemplate; } //fills this template and creates the databases if (m->control_pressed) { return 0; } diff --git a/chimeraslayer.h b/chimeraslayer.h index 3e76297..cac96eb 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -23,7 +23,7 @@ class ChimeraSlayer : public Chimera { public: ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); - ChimeraSlayer(string, string, bool, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); + ChimeraSlayer(string, string, bool, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); ~ChimeraSlayer(); @@ -41,16 +41,18 @@ class ChimeraSlayer : public Chimera { private: Sequence* querySeq; - Sequence* trimQuery; + Sequence trimQuery; DeCalculator* decalc; map spotMap; Database* databaseRight; Database* databaseLeft; - map > nameMapRank; //sequence name to rank so you can construct a template of the abundant sequences if the user uses itself as template + vector userTemplate; //when template=self, the query file is sorted from most abundance to least abundant + //userTemplate grows as the query file is processed by adding sequences that are not chimeric + set namesOfChimericSeqs; //only used when template=self vector chimeraResults; data_results printResults; - string chimeraFlags, searchMethod, fastafile, includeAbunds; + string chimeraFlags, searchMethod, fastafile; bool realign, trimChimera; int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment; float divR; @@ -59,8 +61,8 @@ class ChimeraSlayer : public Chimera { void printBlock(data_results, data_results, bool, bool, string, ostream&); string getBlock(data_struct, string); string getBlock(data_results, data_results, bool, bool, string); - int readNameFile(string); - vector getTemplate(Sequence*); + //int readNameFile(string); + int getTemplate(Sequence*); }; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 2e89798..1a9c1fb 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -26,7 +26,6 @@ vector ChimeraSlayerCommand::setParameters(){ CommandParameter pminsnp("minsnp", "Number", "", "100", "", "", "",false,false); parameters.push_back(pminsnp); CommandParameter pminbs("minbs", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminbs); CommandParameter psearch("search", "Multiple", "kmer-blast-distance", "distance", "", "", "",false,false); parameters.push_back(psearch); - CommandParameter pinclude("include", "Multiple", "greater-greaterequal-all", "greater", "", "", "",false,false); parameters.push_back(pinclude); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter prealign("realign", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prealign); CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptrim); @@ -59,7 +58,6 @@ string ChimeraSlayerCommand::getHelpString(){ helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; - helpString += "The include parameter is used when template=self and allows you to choose which sequences will make up the \"template\". Options are greater, greaterequal and all, default=greater, meaning sequences with greater abundance than the query sequence. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; #ifdef USE_MPI helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n"; @@ -282,9 +280,6 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { m->setProcessors(temp); convert(temp, processors); - includeAbunds = validParameter.validFile(parameters, "include", false); if (includeAbunds == "not found") { includeAbunds = "greater"; } - if ((includeAbunds != "greater") && (includeAbunds != "greaterequal") && (includeAbunds != "all")) { includeAbunds = "greater"; m->mothurOut("Invalid include setting. options are greater, greaterequal or all. using greater."); m->mothurOutEndLine(); } - temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; } convert(temp, ksize); @@ -359,10 +354,11 @@ int ChimeraSlayerCommand::execute(){ if (templatefile != "self") { //you want to run slayer with a refernce template chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); }else { + if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } + string nameFile = ""; if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); + nameFile = nameFileNames[s]; }else { - m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine(); //use unique.seqs to create new name and fastafile @@ -379,11 +375,18 @@ int ChimeraSlayerCommand::execute(){ m->mothurOut("/******************************************/"); m->mothurOutEndLine(); - string nameFile = filenames["name"][0]; + nameFile = filenames["name"][0]; fastaFileNames[s] = filenames["fasta"][0]; - - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); } + + //sort fastafile by abundance, returns new sorted fastafile name + m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); + fastaFileNames[s] = sortFastaFile(fastaFileNames[s], nameFile); + m->mothurOut("Done."); m->mothurOutEndLine(); + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } + + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); } if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it @@ -935,6 +938,57 @@ int ChimeraSlayerCommand::divideInHalf(Sequence querySeq, string& leftQuery, str exit(1); } } +/**************************************************************************************************/ + +string ChimeraSlayerCommand::sortFastaFile(string fastaFile, string nameFile) { + try { + + //read through fastafile and store info + map seqs; + ifstream in; + m->openInputFile(fastaFile, in); + + while (!in.eof()) { + + if (m->control_pressed) { in.close(); return ""; } + + Sequence seq(in); m->gobble(in); + seqs[seq.getName()] = seq.getAligned(); + } + + in.close(); + + //read namefile + vector nameMapCount; + int error = m->readNames(nameFile, nameMapCount, seqs); + + if (m->control_pressed) { return ""; } + + if (error == 1) { m->control_pressed = true; return ""; } + if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return ""; } + + sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes); + + string newFasta = fastaFile + ".temp"; + ofstream out; + m->openOutputFile(newFasta, out); + + //print new file in order of + for (int i = 0; i < nameMapCount.size(); i++) { + out << ">" << nameMapCount[i].name << endl << nameMapCount[i].seq << endl; + } + out.close(); + + rename(newFasta.c_str(), fastaFile.c_str()); + + return fastaFile; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile"); + exit(1); + } +} /**************************************************************************************************/ diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index 02ebe72..6ca0310 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -14,7 +14,6 @@ #include "command.hpp" #include "chimera.h" - /***********************************************************/ class ChimeraSlayerCommand : public Command { @@ -45,13 +44,14 @@ private: int driver(linePair*, string, string, string, string); int createProcesses(string, string, string, string); int divideInHalf(Sequence, string&, string&); + string sortFastaFile(string, string); #ifdef USE_MPI int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); #endif bool abort, realign, trim, trimera; - string fastafile, templatefile, outputDir, search, namefile, includeAbunds; + string fastafile, templatefile, outputDir, search, namefile; int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength; float divR; Chimera* chimera; diff --git a/mothur.h b/mothur.h index 190254a..53074eb 100644 --- a/mothur.h +++ b/mothur.h @@ -125,6 +125,15 @@ struct distlinePair { int end; }; +/************************************************************/ +struct seqPriorityNode { + int numIdentical; + string seq; + string name; + seqPriorityNode() {} + seqPriorityNode(int n, string s, string nm) : numIdentical(n), seq(s), name(nm) {} + ~seqPriorityNode() {} +}; /***************************************************************/ struct spearmanRank { string name; @@ -138,6 +147,11 @@ inline bool compareSpearman(spearmanRank left, spearmanRank right){ return (left.score > right.score); } //******************************************************************************************************************** +//sorts highest to lowest +inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right){ + return (left.numIdentical > right.numIdentical); +} +//******************************************************************************************************************** //sorts lowest to highest inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){ return (left.score < right.score); diff --git a/mothurout.cpp b/mothurout.cpp index 8fde2fd..6529622 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1217,6 +1217,42 @@ map MothurOut::readNames(string namefile) { exit(1); } } +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { + try { + int error = 0; + + //open input file + ifstream in; + openInputFile(namefile, in); + + while (!in.eof()) { + if (control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol >> secondCol; gobble(in); + + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + } + in.close(); + + return error; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} /***********************************************************************/ diff --git a/mothurout.h b/mothurout.h index 8138f28..9446bf9 100644 --- a/mothurout.h +++ b/mothurout.h @@ -65,6 +65,7 @@ class MothurOut { void gobble(istream&); void gobble(istringstream&); map readNames(string); + int readNames(string, vector&, map&); //searchs and checks bool checkReleaseVersion(ifstream&, string); -- 2.39.2