X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=seqsummarycommand.cpp;h=c328a041267d01e21ed2e99780f18de486571685;hb=957d67f7d8bbadfd2930de061e89fd9b149270fd;hp=be75e4fac4ab60697b0dd84de6eeb275d231a5b3;hpb=ae57e166b2ed7b475ec3f466106bd76fabadd063;p=mothur.git diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index be75e4f..c328a04 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -45,6 +45,26 @@ string SeqSummaryCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string SeqSummaryCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "summary") { outputFileName = "summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "SeqSummaryCommand", "getOutputFileNameTag"); + exit(1); + } +} //********************************************************************************************************************** SeqSummaryCommand::SeqSummaryCommand(){ @@ -131,9 +151,13 @@ SeqSummaryCommand::SeqSummaryCommand(string option) { string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); - - + m->mothurConvert(temp, processors); + + if (namefile == "") { + vector files; files.push_back(fastafile); + parser.getNameFile(files); + } + } } catch(exception& e) { @@ -151,7 +175,7 @@ int SeqSummaryCommand::execute(){ //set current fasta to fastafile m->setFastaFile(fastafile); - string summaryFile = outputDir + m->getSimpleName(fastafile) + ".summary"; + string summaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("summary"); int numSeqs = 0; @@ -282,11 +306,12 @@ int SeqSummaryCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else vector positions; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } #else positions = m->setFilePosFasta(fastafile, numSeqs); + if (positions.size() < processors) { processors = positions.size(); } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; @@ -317,6 +342,19 @@ int SeqSummaryCommand::execute(){ sort(ambigBases.begin(), ambigBases.end()); sort(longHomoPolymer.begin(), longHomoPolymer.end()); int size = startPosition.size(); + + //find means + float meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer; + meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0; + for (int i = 0; i < size; i++) { + meanStartPosition += startPosition[i]; + meanEndPosition += endPosition[i]; + meanSeqLength += seqLength[i]; + meanAmbigBases += ambigBases[i]; + meanLongHomoPolymer += longHomoPolymer[i]; + } + //this is an int divide so the remainder is lost + meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size; int ptile0_25 = int(size * 0.025); int ptile25 = int(size * 0.250); @@ -340,6 +378,8 @@ int SeqSummaryCommand::execute(){ m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75]) + "\t" + toString(ptile75+1)); m->mothurOutEndLine(); m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5]) + "\t" + toString(ptile97_5+1)); m->mothurOutEndLine(); m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine(); + m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine(); + if (namefile == "") { m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); } else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); } @@ -394,7 +434,7 @@ int SeqSummaryCommand::driverCreateSummary(vector& startPosition, vector::iterator it = nameMap.find(current.getName()); - if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; } + if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; } else { num = it->second; } } @@ -414,7 +454,7 @@ int SeqSummaryCommand::driverCreateSummary(vector& startPosition, vector= filePos->end)) { break; } #else @@ -510,7 +550,7 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector& startPosition, int num = 0; processIDS.clear(); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -584,34 +624,42 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector& startPosition, ////////////////////////////////////////////////////////////////////////////////////////////////////// vector pDataArray; - DWORD dwThreadIdArray[processors]; - HANDLE hThreadArray[processors]; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; //Create processor worker threads. - for( int i=0; istart << '\t' << lines[i]->end << endl; + for( int i=0; istart, lines[i]->end, namefile, nameMap); + seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, namefile, nameMap); pDataArray.push_back(tempSum); - processIDS.push_back(i); - + //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); } - + + //do your part + num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, (sumFile+toString(processors-1)+".temp"), lines[processors-1]); + processIDS.push_back(processors-1); + //Wait until all threads have terminated. - WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE); + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ num += pDataArray[i]->count; + for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) { startPosition.push_back(pDataArray[i]->startPosition[k]); } + for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) { endPosition.push_back(pDataArray[i]->endPosition[k]); } + for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) { seqLength.push_back(pDataArray[i]->seqLength[k]); } + for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) { ambigBases.push_back(pDataArray[i]->ambigBases[k]); } + for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) { longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]); } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } - - //rename((sumFile + toString(processIDS[0]) + ".temp").c_str(), sumFile.c_str()); + //append files for(int i=0;iappendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);