From 36a867cbd85d9c276d3c8d13f25a150bbbe2466b Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 4 Apr 2011 14:36:21 +0000 Subject: [PATCH] sped up the remove.seqs and remove.lineage and get.seqs and get.lineage. fixed bug with nast removeextrgaps --- chimeraslayercommand.cpp | 2 +- getlineagecommand.cpp | 10 +--------- getseqscommand.cpp | 10 +--------- nast.cpp | 18 +++++++++--------- removelineagecommand.cpp | 10 +--------- removeseqscommand.cpp | 21 +++++++-------------- 6 files changed, 20 insertions(+), 51 deletions(-) diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index e7b02b8..9b4a87b 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -684,7 +684,7 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f } - count++; + count++; } delete candidateSeq; diff --git a/getlineagecommand.cpp b/getlineagecommand.cpp index 2a5505f..7950680 100644 --- a/getlineagecommand.cpp +++ b/getlineagecommand.cpp @@ -442,15 +442,7 @@ int GetLineageCommand::readName(){ if (dups) { hold = secondCol; } vector parsedNames; - //parse second column saving each name - while (secondCol.find_first_of(',') != -1) { - name = secondCol.substr(0,secondCol.find_first_of(',')); - secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length()); - parsedNames.push_back(name); - } - - //get name after last , - parsedNames.push_back(secondCol); + m->splitAtComma(secondCol, parsedNames); vector validSecond; for (int i = 0; i < parsedNames.size(); i++) { diff --git a/getseqscommand.cpp b/getseqscommand.cpp index 6d4bff2..ce820fe 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -539,15 +539,7 @@ int GetSeqsCommand::readName(){ if (dups) { hold = secondCol; } vector parsedNames; - //parse second column saving each name - while (secondCol.find_first_of(',') != -1) { - name = secondCol.substr(0,secondCol.find_first_of(',')); - secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length()); - parsedNames.push_back(name); - } - - //get name after last , - parsedNames.push_back(secondCol); + m->splitAtComma(secondCol, parsedNames); vector validSecond; for (int i = 0; i < parsedNames.size(); i++) { diff --git a/nast.cpp b/nast.cpp index bf65fa2..647e0e4 100644 --- a/nast.cpp +++ b/nast.cpp @@ -139,24 +139,22 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl string leftCandidateString = candAln.substr(0,(leftIndex-insertLength+1)); string rightCandidateString = candAln.substr((leftIndex+1)); candAln = leftCandidateString + rightCandidateString; - - } - else{ // not enough room to the left, have to steal some space to + + }else{ // not enough room to the left, have to steal some space to the right //cout << "in else lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << insertLength << endl; - string leftTemplateString = newTemplateAlign.substr(0,i); // the right + string leftTemplateString = newTemplateAlign.substr(0,i); string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); - //cout << " in else lr candAln = " << candAln.length() << '\t' << " leftIndex = " << leftIndex << " leftroom = " << leftRoom << " rightIndex = " << rightIndex << '\t' << endl; + //cout << " in else lr candAln = " << candAln.length() << '\t' << " leftIndex = " << leftIndex << " leftroom = " << leftRoom << " rightIndex = " << rightIndex << '\t' << " rightroom = " << rightRoom << '\t' << endl; string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); string rightCandidateString = candAln.substr((rightIndex+(insertLength-leftRoom))); candAln = leftCandidateString + insertString + rightCandidateString; } - } - else{ // the right gap is closer - > move stuff right there's + }else{ // the right gap is closer - > move stuff right there's if(rightRoom >= insertLength){ // enough room to the right to move //cout << "rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; string leftTemplateString = newTemplateAlign.substr(0,i); @@ -184,7 +182,9 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl } } - i -= insertLength; + + if ((i - insertLength) < 0) { i = 0; } + else { i -= insertLength; } } else{ @@ -207,7 +207,7 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl // i -= insertLength; //if i is negative, we want to remove the extra gaps to the right - if (i < 0) { cout << "i is negative" << endl; } + if (i < 0) { m->mothurOut("i is negative"); m->mothurOutEndLine(); } } } } diff --git a/removelineagecommand.cpp b/removelineagecommand.cpp index ef1e77e..ebd0608 100644 --- a/removelineagecommand.cpp +++ b/removelineagecommand.cpp @@ -434,16 +434,8 @@ int RemoveLineageCommand::readName(){ in >> secondCol; vector parsedNames; - //parse second column saving each name - while (secondCol.find_first_of(',') != -1) { - name = secondCol.substr(0,secondCol.find_first_of(',')); - secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length()); - parsedNames.push_back(name); - } + m->splitAtComma(secondCol, parsedNames); - //get name after last , - parsedNames.push_back(secondCol); - vector validSecond; validSecond.clear(); for (int i = 0; i < parsedNames.size(); i++) { if (names.count(parsedNames[i]) == 0) { diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp index 1990e9c..02cb5d9 100644 --- a/removeseqscommand.cpp +++ b/removeseqscommand.cpp @@ -513,24 +513,17 @@ int RemoveSeqsCommand::readName(){ string name, firstCol, secondCol; bool wroteSomething = false; + while(!in.eof()){ if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str()); return 0; } - + in >> firstCol; m->gobble(in); in >> secondCol; - + vector parsedNames; - //parse second column saving each name - while (secondCol.find_first_of(',') != -1) { - name = secondCol.substr(0,secondCol.find_first_of(',')); - secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length()); - parsedNames.push_back(name); - } + m->splitAtComma(secondCol, parsedNames); - //get name after last , - parsedNames.push_back(secondCol); - vector validSecond; validSecond.clear(); for (int i = 0; i < parsedNames.size(); i++) { if (names.count(parsedNames[i]) == 0) { @@ -541,7 +534,7 @@ int RemoveSeqsCommand::readName(){ if ((dups) && (validSecond.size() != parsedNames.size())) { //if dups is true and we want to get rid of anyone, get rid of everyone for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); } }else { - //if the name in the first column is in the set then print it and any other names in second column also in set + //if the name in the first column is in the set then print it and any other names in second column also in set if (names.count(firstCol) == 0) { wroteSomething = true; @@ -572,10 +565,10 @@ int RemoveSeqsCommand::readName(){ } in.close(); out.close(); - + if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); } outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName); - + return 0; } catch(exception& e) { -- 2.39.2