From: westcott Date: Wed, 3 Aug 2011 17:01:11 +0000 (+0000) Subject: added paralellization for windows to dist.seqs and summary.seqs X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=fc1ed1ae1b022719176910ab71993bd6535810ad added paralellization for windows to dist.seqs and summary.seqs --- diff --git a/classify.cpp b/classify.cpp index 1642f0b..4c6c6d8 100644 --- a/classify.cpp +++ b/classify.cpp @@ -123,7 +123,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me //create database if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); } else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); } + else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); } else if(method == "distance") { database = new DistanceDB(); } else { m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine(); diff --git a/distancecommand.cpp b/distancecommand.cpp index b69b93d..f7dff2e 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -8,11 +8,6 @@ */ #include "distancecommand.h" -#include "ignoregaps.h" -#include "eachgapdist.h" -#include "eachgapignore.h" -#include "onegapdist.h" -#include "onegapignore.h" //********************************************************************************************************************** vector DistanceCommand::setParameters(){ @@ -200,26 +195,6 @@ DistanceCommand::DistanceCommand(string option) { if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; } if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; } - - ValidCalculators validCalculator; - - if (m->isTrue(countends) == true) { - for (int i=0; icontrol_pressed) { outputTypes.clear(); delete distCalculator; m->mothurRemove(outputFile); return 0; } + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &pid); @@ -492,9 +467,7 @@ int DistanceCommand::execute(){ } #endif - if (m->control_pressed) { outputTypes.clear(); delete distCalculator; m->mothurRemove(outputFile); return 0; } - - delete distCalculator; + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } //set phylip file as new current phylipfile string current = ""; @@ -567,25 +540,80 @@ void DistanceCommand::createProcesses(string filename) { int temp = processIDS[i]; wait(&temp); } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the distanceData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //that's why the distance calculator was moved inside of the driver to make separate copies. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; //[processors-1]; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor-1 worker threads. + for( int i=0; iappendFiles((filename + toString(processIDS[i]) + ".temp"), filename); m->mothurRemove((filename + toString(processIDS[i]) + ".temp")); } -#endif + } catch(exception& e) { m->errorOut(e, "DistanceCommand", "createProcesses"); exit(1); } } - /**************************************************************************************************/ /////// need to fix to work with calcs and sequencedb int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){ try { - + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { outFile.close(); return 0; } + if (m->control_pressed) { delete distCalculator; outFile.close(); return 0; } //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop @@ -630,6 +658,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); outFile.close(); + delete distCalculator; return 1; } @@ -642,7 +671,26 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float /////// need to fix to work with calcs and sequencedb int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){ try { - + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { outFile.close(); return 0; } + if (m->control_pressed) { delete distCalculator; outFile.close(); return 0; } distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); @@ -680,6 +728,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); outFile.close(); + delete distCalculator; return 1; } @@ -693,6 +742,28 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string /////// need to fix to work with calcs and sequencedb int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){ try { + + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { return 0; } + if (m->control_pressed) { delete distCalculator; return 0; } //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop @@ -735,7 +806,8 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + delete distCalculator; return 1; } catch(exception& e) { @@ -747,6 +819,27 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo /////// need to fix to work with calcs and sequencedb int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){ try { + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { return 0; } + if (m->control_pressed) { delete distCalculator; return 0; } distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); @@ -809,6 +902,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; MPI_File_close(&outMPI); + delete distCalculator; return 1; } @@ -821,6 +915,26 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned /////// need to fix to work with calcs and sequencedb int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){ try { + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { return 0; } + if (m->control_pressed) { delete distCalculator; return 0; } distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); @@ -883,7 +997,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; MPI_File_close(&outMPI); - + delete distCalculator; return 1; } catch(exception& e) { diff --git a/distancecommand.h b/distancecommand.h index bac5d14..9d5477d 100644 --- a/distancecommand.h +++ b/distancecommand.h @@ -15,8 +15,159 @@ #include "validcalculator.h" #include "dist.h" #include "sequencedb.h" +#include "ignoregaps.h" +#include "eachgapdist.h" +#include "eachgapignore.h" +#include "onegapdist.h" +#include "onegapignore.h" +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +typedef struct distanceData { + int startLine; + int endLine; + string dFileName; + float cutoff; + SequenceDB alignDB; + vector Estimators; + MothurOut* m; + string output; + int numNewFasta; + string countends; + + distanceData(){} + distanceData(int s, int e, string dbname, float c, SequenceDB db, vector Est, MothurOut* mout, string o, int num, string count) { + startLine = s; + endLine = e; + dFileName = dbname; + cutoff = c; + alignDB = db; + Estimators = Est; + m = mout; + output = o; + numNewFasta = num; + countends = count; + + } +}; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){ + distanceData* pDataArray; + pDataArray = (distanceData*)lpParam; + + try { + ValidCalculators validCalculator; + Dist* distCalculator; + if (pDataArray->m->isTrue(pDataArray->countends) == true) { + for (int i=0; iEstimators.size(); i++) { + if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { + if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); } + else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapDist(); } + } + } + }else { + for (int i=0; iEstimators.size(); i++) { + if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { + if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + } + + int startTime = time(NULL); + + //column file + ofstream outFile(pDataArray->dFileName.c_str(), ios::trunc); + outFile.setf(ios::fixed, ios::showpoint); + outFile << setprecision(4); + + + if (pDataArray->output != "square") { + if((pDataArray->output == "lt") && (pDataArray->startLine == 0)){ outFile << pDataArray->alignDB.getNumSeqs() << endl; } + + for(int i=pDataArray->startLine;iendLine;i++){ + if(pDataArray->output == "lt") { + string name = pDataArray->alignDB.get(i).getName(); + if (name.length() < 10) { //pad with spaces to make compatible + while (name.length() < 10) { name += " "; } + } + outFile << name << '\t'; + } + for(int j=0;jm->control_pressed) { delete distCalculator; outFile.close(); return 0; } + + //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file + //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop + if ((i >= pDataArray->numNewFasta) && (j >= pDataArray->numNewFasta)) { break; } + + distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j)); + double dist = distCalculator->getDist(); + + if(dist <= pDataArray->cutoff){ + if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; } + } + if (pDataArray->output == "lt") { outFile << dist << '\t'; } + } + + if (pDataArray->output == "lt") { outFile << endl; } + + if(i % 100 == 0){ + pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + } + + } + pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + }else{ + if(pDataArray->startLine == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; } + + for(int i=pDataArray->startLine;iendLine;i++){ + + string name = pDataArray->alignDB.get(i).getName(); + //pad with spaces to make compatible + if (name.length() < 10) { while (name.length() < 10) { name += " "; } } + + outFile << name << '\t'; + + for(int j=0;jalignDB.getNumSeqs();j++){ + + if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0; } + + distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j)); + double dist = distCalculator->getDist(); + + outFile << dist << '\t'; + } + + outFile << endl; + + if(i % 100 == 0){ + pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + } + + } + pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + } + + outFile.close(); + delete distCalculator; + + return 0; + } + catch(exception& e) { + pDataArray->m->errorOut(e, "DistanceCommand", "MyDistThreadFunction"); + exit(1); + } +} +#endif + +/**************************************************************************************************/ class DistanceCommand : public Command { public: @@ -42,7 +193,7 @@ private: }; - Dist* distCalculator; + //Dist* distCalculator; SequenceDB alignDB; string countends, output, fastafile, calc, outputDir, oldfastafile, column, compress; @@ -74,5 +225,7 @@ private: #endif +/**************************************************************************************************/ + diff --git a/metastatscommand.cpp b/metastatscommand.cpp index 7942bfc..1aaa41e 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -393,7 +393,7 @@ int MetaStatsCommand::driver(int start, int num, vector& th //get set names string setA = namesOfGroupCombos[c][0]; string setB = namesOfGroupCombos[c][1]; - + //get filename string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats"; outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); @@ -423,7 +423,9 @@ int MetaStatsCommand::driver(int start, int num, vector& th setACount++; } } - + + //for (int i = 0; i < subset.size(); i++) { cout << subset[i]->getGroup() << endl; } + if ((setACount == 0) || (setBCount == 0)) { m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); outputNames.pop_back(); diff --git a/mothur.h b/mothur.h index 53074eb..f8ec323 100644 --- a/mothur.h +++ b/mothur.h @@ -72,6 +72,8 @@ #include #include #include + #include + #endif using namespace std; diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index 2fbf6d1..ee88353 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -8,7 +8,7 @@ */ #include "seqsummarycommand.h" -#include "sequence.hpp" + //********************************************************************************************************************** vector SeqSummaryCommand::setParameters(){ @@ -281,31 +281,30 @@ int SeqSummaryCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - vector positions = m->divideFile(fastafile, processors); - - for (int i = 0; i < (positions.size()-1); i++) { - lines.push_back(new linePair(positions[i], positions[(i+1)])); - } - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]); - }else{ - numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); - - rename((summaryFile + toString(processIDS[0]) + ".temp").c_str(), summaryFile.c_str()); - //append files - for(int i=1;iappendFiles((summaryFile + toString(processIDS[i]) + ".temp"), summaryFile); - m->mothurRemove((summaryFile + toString(processIDS[i]) + ".temp")); - } + vector positions; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + positions = m->divideFile(fastafile, processors); + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } + #else + positions = m->setFilePosFasta(fastafile, numSeqs); + + //figure out how many sequences you have to process + int numSeqsPerProcessor = numSeqs / processors; + for (int i = 0; i < processors; i++) { + int startIndex = i * numSeqsPerProcessor; + if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } + lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } - - if (m->control_pressed) { return 0; } - #else + #endif + + + if(processors == 1){ numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]); - if (m->control_pressed) { return 0; } - #endif + }else{ + numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); + } + + if (m->control_pressed) { return 0; } #endif #ifdef USE_MPI @@ -507,11 +506,12 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector& startPo /**************************************************************************************************/ int SeqSummaryCommand::createProcessesCreateSummary(vector& startPosition, vector& endPosition, vector& seqLength, vector& ambigBases, vector& longHomoPolymer, string filename, string sumFile) { try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; + int process = 1; int num = 0; processIDS.clear(); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -545,8 +545,11 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector& startPosition, } } + //do your part + num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile, lines[0]); + //force parent to wait until all the processes are done - for (int i=0;i& startPosition, in.close(); m->mothurRemove(tempFilename); + + m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile); + m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp")); } - return num; +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the seqSumData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to allow both threads to add info to vectors. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors]; + HANDLE hThreadArray[processors]; + + //Create processor worker threads. + for( int i=0; istart << '\t' << lines[i]->end << endl; + // Allocate memory for thread data. + seqSumData* tempSum = new seqSumData(&startPosition, &endPosition, &seqLength, &ambigBases, &longHomoPolymer, filename, (sumFile + toString(i) + ".temp"), m, lines[i]->start, lines[i]->end, namefile, nameMap); + pDataArray.push_back(tempSum); + processIDS.push_back(i); + + //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); + } + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + + //rename((sumFile + toString(processIDS[0]) + ".temp").c_str(), sumFile.c_str()); + //append files + for(int i=0;iappendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile); + m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp")); + } #endif + return num; } catch(exception& e) { m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary"); diff --git a/seqsummarycommand.h b/seqsummarycommand.h index c0d3687..a7ccb57 100644 --- a/seqsummarycommand.h +++ b/seqsummarycommand.h @@ -12,6 +12,9 @@ #include "mothur.h" #include "command.hpp" +#include "sequence.hpp" + +/**************************************************************************************************/ class SeqSummaryCommand : public Command { public: @@ -54,4 +57,118 @@ private: }; +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +typedef struct seqSumData { + vector* startPosition; + vector* endPosition; + vector* seqLength; + vector* ambigBases; + vector* longHomoPolymer; + string filename; + string sumFile; + unsigned long int start; + unsigned long int end; + int count; + MothurOut* m; + string namefile; + map nameMap; + + + seqSumData(){} + seqSumData(vector* s, vector* e, vector* l, vector* a, vector* h, string f, string sf, MothurOut* mout, unsigned long int st, unsigned long int en, string na, map nam) { + startPosition = s; + endPosition = e; + seqLength = l; + ambigBases = a; + longHomoPolymer = h; + filename = f; + sumFile = sf; + m = mout; + start = st; + end = en; + namefile = na; + nameMap = nam; + count = 0; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){ + seqSumData* pDataArray; + pDataArray = (seqSumData*)lpParam; + + try { + ofstream outSummary; + pDataArray->m->openOutputFile(pDataArray->sumFile, outSummary); + + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl; + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->count = pDataArray->end; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; } + + Sequence current(in); pDataArray->m->gobble(in); + + if (current.getName() != "") { + + int num = 1; + if (pDataArray->namefile != "") { + //make sure this sequence is in the namefile, else error + map::iterator it = pDataArray->nameMap.find(current.getName()); + + if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } + else { num = it->second; } + } + + //for each sequence this sequence represents + for (int i = 0; i < num; i++) { + pDataArray->startPosition->push_back(current.getStartPos()); + pDataArray->endPosition->push_back(current.getEndPos()); + pDataArray->seqLength->push_back(current.getNumBases()); + pDataArray->ambigBases->push_back(current.getAmbigBases()); + pDataArray->longHomoPolymer->push_back(current.getLongHomoPolymer()); + } + + outSummary << current.getName() << '\t'; + outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t'; + outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t'; + outSummary << current.getLongHomoPolymer() << '\t' << num << endl; + } + } + + in.close(); + outSummary.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "SeqSummaryCommand", "MySeqSumThreadFunction"); + exit(1); + } +} #endif + + + + +#endif + +/**************************************************************************************************/ + +