From ee8403d4eb5760187d62b42a9cf4272de8fc0ec4 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 12 Jun 2012 11:27:51 -0400 Subject: [PATCH] changed reading of name file to use buffered reads. note the splitAtWhiteSpace function is sensitive to the gobble function. do not use the two together while reading or the read can get off track. modified trim.seqs group counts to include the redundant sees if a names file is provided. changed group maps read of a group file to be buffered. modified appendFiles functions to be buffered. --- aligncommand.cpp | 40 ++------- aligncommand.h | 1 - classifyseqscommand.cpp | 30 +------ classifyseqscommand.h | 1 - consensusseqscommand.cpp | 41 ++++----- createdatabasecommand.cpp | 28 +----- createdatabasecommand.h | 1 - groupmap.cpp | 176 +++++++++++++++++++++++++------------- mothurout.cpp | 162 +++++++++++++++++++++++++---------- mothurout.h | 1 + screenseqscommand.h | 1 - subsamplecommand.cpp | 35 ++------ trimseqscommand.cpp | 7 +- trimseqscommand.h | 6 +- 14 files changed, 280 insertions(+), 250 deletions(-) diff --git a/aligncommand.cpp b/aligncommand.cpp index 8215de3..75466f9 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -875,7 +875,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } in.close(); m->mothurRemove(tempFile); - appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName); + m->appendFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName); m->mothurRemove((alignFileName + toString(processIDS[i]) + ".temp")); appendReportFiles((reportFileName + toString(processIDS[i]) + ".temp"), reportFileName); @@ -892,7 +892,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str()); for (int h=1; h < nonBlankAccnosFiles.size(); h++) { - appendAlignFiles(nonBlankAccnosFiles[h], accnosFName); + m->appendFiles(nonBlankAccnosFiles[h], accnosFName); m->mothurRemove(nonBlankAccnosFiles[h]); } }else { //recreate the accnosfile if needed @@ -957,7 +957,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s else { m->mothurRemove(accnosFName); } //remove so other files can be renamed to it for (int i = 1; i < processors; i++) { - appendAlignFiles((alignFileName + toString(i) + ".temp"), alignFileName); + m->appendFiles((alignFileName + toString(i) + ".temp"), alignFileName); m->mothurRemove((alignFileName + toString(i) + ".temp")); appendReportFiles((reportFileName + toString(i) + ".temp"), reportFileName); @@ -973,7 +973,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str()); for (int h=1; h < nonBlankAccnosFiles.size(); h++) { - appendAlignFiles(nonBlankAccnosFiles[h], accnosFName); + m->appendFiles(nonBlankAccnosFiles[h], accnosFName); m->mothurRemove(nonBlankAccnosFiles[h]); } }else { //recreate the accnosfile if needed @@ -990,29 +990,6 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s exit(1); } } -/**************************************************************************************************/ - -void AlignCommand::appendAlignFiles(string temp, string filename) { - try{ - - ofstream output; - ifstream input; - m->openOutputFileAppend(filename, output); - m->openInputFile(temp, input); - - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } - } - - input.close(); - output.close(); - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "appendAlignFiles"); - exit(1); - } -} //********************************************************************************************************************** void AlignCommand::appendReportFiles(string temp, string filename) { @@ -1025,10 +1002,11 @@ void AlignCommand::appendReportFiles(string temp, string filename) { while (!input.eof()) { char c = input.get(); if (c == 10 || c == 13){ break; } } // get header line - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } - } + char buffer[4096]; + while (!input.eof()) { + input.read(buffer, 4096); + output.write(buffer, input.gcount()); + } input.close(); output.close(); diff --git a/aligncommand.h b/aligncommand.h index 7eeaa1e..d4b7e78 100644 --- a/aligncommand.h +++ b/aligncommand.h @@ -55,7 +55,6 @@ private: int driver(linePair*, string, string, string, string); int createProcesses(string, string, string, string); - void appendAlignFiles(string, string); void appendReportFiles(string, string); #ifdef USE_MPI diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index b6dc24f..158069e 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -919,8 +919,8 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, else { m->mothurRemove(accnos); } //remove so other files can be renamed to it for(int i=0;iappendFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName); + m->appendFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile); if (!(m->isBlank(accnos + toString(processIDS[i]) + ".temp"))) { nonBlankAccnosFiles.push_back(accnos + toString(processIDS[i]) + ".temp"); }else { m->mothurRemove((accnos + toString(processIDS[i]) + ".temp")); } @@ -934,7 +934,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, rename(nonBlankAccnosFiles[0].c_str(), accnos.c_str()); for (int h=1; h < nonBlankAccnosFiles.size(); h++) { - appendTaxFiles(nonBlankAccnosFiles[h], accnos); + m->appendFiles(nonBlankAccnosFiles[h], accnos); m->mothurRemove(nonBlankAccnosFiles[h]); } }else { //recreate the accnosfile if needed @@ -951,30 +951,6 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, exit(1); } } -/**************************************************************************************************/ - -void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) { - try{ - - ofstream output; - ifstream input; - m->openOutputFileAppend(filename, output); - m->openInputFile(temp, input); - - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } - } - - input.close(); - output.close(); - } - catch(exception& e) { - m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles"); - exit(1); - } -} - //********************************************************************************************************************** int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){ diff --git a/classifyseqscommand.h b/classifyseqscommand.h index acee70c..1316a25 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -75,7 +75,6 @@ private: bool abort, probs, save, flip; int driver(linePair*, string, string, string, string); - void appendTaxFiles(string, string); int createProcesses(string, string, string, string); string addUnclassifieds(string, int); diff --git a/consensusseqscommand.cpp b/consensusseqscommand.cpp index 223e5db..55ec802 100644 --- a/consensusseqscommand.cpp +++ b/consensusseqscommand.cpp @@ -653,38 +653,29 @@ int ConsensusSeqsCommand::readFasta(){ int ConsensusSeqsCommand::readNames(){ try{ - - ifstream in; - m->openInputFile(namefile, in); - - string thisname, repnames; - map::iterator it; - - bool error = false; - - while(!in.eof()){ - - if (m->control_pressed) { break; } - - in >> thisname; m->gobble(in); //read from first column - in >> repnames; //read from second column - - it = nameMap.find(thisname); + map temp; + map::iterator it; + bool error = false; + + m->readNames(namefile, temp); //use central buffered read + + for (map::iterator itTemp = temp.begin(); itTemp != temp.end(); itTemp++) { + string thisname, repnames; + thisname = itTemp->first; + repnames = itTemp->second; + + it = nameMap.find(thisname); if (it != nameMap.end()) { //then this sequence was in the fastafile - + nameFileMap[thisname] = repnames; //for later when outputting the new namesFile if the list file is unique + vector splitRepNames; m->splitAtComma(repnames, splitRepNames); - nameFileMap[thisname] = repnames; //for later when outputting the new namesFile if the list file is unique for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; } }else{ m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct."); m->mothurOutEndLine(); error = true; } - - m->gobble(in); - } - - in.close(); - + } + if (error) { m->control_pressed = true; } return 0; diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp index 1da67e6..57d5264 100644 --- a/createdatabasecommand.cpp +++ b/createdatabasecommand.cpp @@ -209,7 +209,7 @@ int CreateDatabaseCommand::execute(){ //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map. map repNames; - int numUniqueNamesFile = readNames(repNames); + int numUniqueNamesFile = m->readNames(repnamesfile, repNames); //are there the same number of otus in the fasta and name files if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; } @@ -394,32 +394,6 @@ vector CreateDatabaseCommand::readFasta(vector& seqs){ exit(1); } } -/**********************************************************************************************************************/ -int CreateDatabaseCommand::readNames(map& nameMap) { - try { - - //open input file - ifstream in; - m->openInputFile(repnamesfile, in); - - while (!in.eof()) { - if (m->control_pressed) { break; } - - string firstCol, secondCol; - in >> firstCol >> secondCol; m->gobble(in); - - nameMap[secondCol] = firstCol; - } - in.close(); - - return nameMap.size(); - - } - catch(exception& e) { - m->errorOut(e, "CreateDatabaseCommand", "readNames"); - exit(1); - } -} //********************************************************************************************************************** ListVector* CreateDatabaseCommand::getList(){ try { diff --git a/createdatabasecommand.h b/createdatabasecommand.h index 643ff6e..37e3013 100644 --- a/createdatabasecommand.h +++ b/createdatabasecommand.h @@ -39,7 +39,6 @@ private: vector readFasta(vector&); vector readTax(vector&); - int readNames(map&); ListVector* getList(); }; diff --git a/groupmap.cpp b/groupmap.cpp index 92a43e9..612b236 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -23,84 +23,138 @@ /************************************************************/ int GroupMap::readMap() { - string seqName, seqGroup; + try { + string seqName, seqGroup; int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - it = groupmap.find(seqName); - - if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group - } - m->gobble(fileHandle); - } + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } fileHandle.close(); + m->setAllGroups(namesOfGroups); return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readMap"); + exit(1); + } } /************************************************************/ int GroupMap::readDesignMap() { - string seqName, seqGroup; + try { + string seqName, seqGroup; int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - it = groupmap.find(seqName); - - if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group - } - m->gobble(fileHandle); - } + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } fileHandle.close(); + m->setAllGroups(namesOfGroups); return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } } /************************************************************/ int GroupMap::readDesignMap(string filename) { - groupFileName = filename; - m->openInputFile(filename, fileHandle); - index = 0; - string seqName, seqGroup; - int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - it = groupmap.find(seqName); + try { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; - if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } } - m->gobble(fileHandle); + fileHandle.close(); + + m->setAllGroups(namesOfGroups); + return error; } - fileHandle.close(); - m->setAllGroups(namesOfGroups); - return error; + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } } /************************************************************/ int GroupMap::getNumGroups() { return namesOfGroups.size(); } diff --git a/mothurout.cpp b/mothurout.cpp index 6ecb86f..f98bea8 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1092,11 +1092,14 @@ int MothurOut::appendFiles(string temp, string filename) { int numLines = 0; if (ableToOpen == 0) { //you opened it - while(!input.eof()){ - char c = input.get(); - if(input.eof()) { break; } - else { output << c; if (c == '\n') {numLines++;} } - } + + char buffer[4096]; + while (!input.eof()) { + input.read(buffer, 4096); + output.write(buffer, input.gcount()); + //count number of lines + for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} } + } input.close(); } @@ -1454,6 +1457,30 @@ float MothurOut::ceilDist(float dist, int precision){ exit(1); } } +/***********************************************************************/ + +vector MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){ + try { + vector pieces; + + for (int i = 0; i < size; i++) { + if (!isspace(buffer[i])) { rest += buffer[i]; } + else { + pieces.push_back(rest); rest = ""; + while (i < size) { //gobble white space + if (isspace(buffer[i])) { i++; } + else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "parsePieces"); + exit(1); + } +} /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap) { try { @@ -1461,14 +1488,25 @@ int MothurOut::readNames(string namefile, map& nameMap) { //open input file ifstream in; openInputFile(namefile, in); - + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); - - nameMap[firstCol] = secondCol; + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + } } in.close(); @@ -1488,21 +1526,33 @@ int MothurOut::readNames(string namefile, map >& nameMap) ifstream in; openInputFile(namefile, in); + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); - - vector temp; - splitAtComma(secondCol, temp); - - nameMap[firstCol] = temp; + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + vector temp; + splitAtComma(secondCol, temp); + nameMap[firstCol] = temp; + pairDone = false; + } + } } in.close(); - + return nameMap.size(); - } catch(exception& e) { errorOut(e, "MothurOut", "readNames"); @@ -1519,18 +1569,30 @@ map MothurOut::readNames(string namefile) { ifstream in; openInputFile(namefile, in); + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol; gobble(in); - in >> secondCol; gobble(in); - - int num = getNumNames(secondCol); - - nameMap[firstCol] = num; + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } } - in.close(); + in.close(); return nameMap; @@ -1549,27 +1611,41 @@ int MothurOut::readNames(string namefile, vector& nameVector, m ifstream in; openInputFile(namefile, in); + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; + while (!in.eof()) { if (control_pressed) { break; } - string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); - - int num = getNumNames(secondCol); - - map::iterator it = fastamap.find(firstCol); - if (it == fastamap.end()) { - error = 1; - mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); - }else { - seqPriorityNode temp(num, it->second, firstCol); - nameVector.push_back(temp); - } + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } } - in.close(); - + in.close(); + return error; - } catch(exception& e) { errorOut(e, "MothurOut", "readNames"); diff --git a/mothurout.h b/mothurout.h index cc8bfb6..b19c05a 100644 --- a/mothurout.h +++ b/mothurout.h @@ -97,6 +97,7 @@ class MothurOut { string getline(istringstream&); void gobble(istream&); void gobble(istringstream&); + vector splitWhiteSpace(string& rest, char[], int); map readNames(string); int readNames(string, map&); int readNames(string, map >&); diff --git a/screenseqscommand.h b/screenseqscommand.h index 291d8e6..54c8fbb 100644 --- a/screenseqscommand.h +++ b/screenseqscommand.h @@ -60,7 +60,6 @@ private: vector outputNames; vector optimize; map nameMap; - int readNames(); int getSummary(vector&); int createProcessesCreateSummary(vector&, vector&, vector&, vector&, vector&, string); diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index 717b1d3..aebba6b 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -639,34 +639,13 @@ int SubSampleCommand::getNames() { int SubSampleCommand::readNames() { try { - ifstream in; - m->openInputFile(namefile, in); - - string thisname, repnames; - map >::iterator it; - - while(!in.eof()){ - - if (m->control_pressed) { in.close(); return 0; } - - in >> thisname; m->gobble(in); //read from first column - in >> repnames; //read from second column - - it = nameMap.find(thisname); - if (it == nameMap.end()) { - - vector splitRepNames; - m->splitAtComma(repnames, splitRepNames); - - nameMap[thisname] = splitRepNames; - for (int i = 0; i < splitRepNames.size(); i++) { names.push_back(splitRepNames[i]); } - - }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); } - - m->gobble(in); - } - in.close(); - + nameMap.clear(); + m->readNames(namefile, nameMap); + + //save names of all sequences + map >::iterator it; + for (it = nameMap.begin(); it != nameMap.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { names.push_back((it->second)[i]); } } + return 0; } diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index c019a70..6f5bb97 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -687,6 +687,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string currQual.printQScores(trimQualFile); } + if(nameFile != ""){ map::iterator itName = nameMap.find(currSeq.getName()); if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } @@ -708,11 +709,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; + int numRedundants = 0; if (nameFile != "") { map::iterator itName = nameMap.find(currSeq.getName()); if (itName != nameMap.end()) { vector thisSeqsNames; m->splitAtChar(itName->second, thisSeqsNames, ','); + numRedundants = thisSeqsNames.size()-1; //we already include ourselves below for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; } @@ -720,8 +723,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } map::iterator it = groupCounts.find(thisGroup); - if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } - else { groupCounts[it->first]++; } + if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } + else { groupCounts[it->first] += (1 + numRedundants); } } } diff --git a/trimseqscommand.h b/trimseqscommand.h index ba4e614..e280c8a 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -413,11 +413,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; + int numRedundants = 0; if (pDataArray->nameFile != "") { map::iterator itName = pDataArray->nameMap.find(currSeq.getName()); if (itName != pDataArray->nameMap.end()) { vector thisSeqsNames; pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ','); + numRedundants = thisSeqsNames.size()-1; //we already include ourselves below for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; } @@ -425,8 +427,8 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } map::iterator it = pDataArray->groupCounts.find(thisGroup); - if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; } - else { pDataArray->groupCounts[it->first]++; } + if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1 + numRedundants; } + else { pDataArray->groupCounts[it->first] += (1 + numRedundants); } } } -- 2.39.2