From: westcott Date: Tue, 6 Dec 2011 12:53:59 +0000 (+0000) Subject: metastats in progress X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=7f0cae4f4853cc3f12bc751ee06ea31c7c97496e metastats in progress --- diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp index 321ae77..7468011 100644 --- a/chimeraccodecommand.cpp +++ b/chimeraccodecommand.cpp @@ -390,14 +390,16 @@ int ChimeraCcodeCommand::execute(){ outHeader.close(); - vector positions = m->divideFile(fastaFileNames[s], processors); - - for (int i = 0; i < (positions.size()-1); i++) { - lines.push_back(new linePair(positions[i], positions[(i+1)])); - } + //break up file #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + vector positions = m->divideFile(fastaFileNames[s], processors); + + for (int i = 0; i < (positions.size()-1); i++) { + lines.push_back(new linePair(positions[i], positions[(i+1)])); + } + if(processors == 1){ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); @@ -436,6 +438,7 @@ int ChimeraCcodeCommand::execute(){ } #else + lines.push_back(new linePair(0, 1000)); numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; } diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp index 3a530fd..400a715 100644 --- a/chimeracheckcommand.cpp +++ b/chimeracheckcommand.cpp @@ -424,14 +424,16 @@ int ChimeraCheckCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - vector positions = m->divideFile(fastaFileNames[i], processors); - - for (int s = 0; s < (positions.size()-1); s++) { - lines.push_back(new linePair(positions[s], positions[(s+1)])); - } + //break up file #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + vector positions = m->divideFile(fastaFileNames[i], processors); + + for (int s = 0; s < (positions.size()-1); s++) { + lines.push_back(new linePair(positions[s], positions[(s+1)])); + } + if(processors == 1){ numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]); @@ -459,6 +461,7 @@ int ChimeraCheckCommand::execute(){ } #else + lines.push_back(new linePair(0, 1000)); numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]); if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; } diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index 76dd0da..0627793 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -486,14 +486,15 @@ int ChimeraPintailCommand::execute(){ MPI_File_close(&outMPIAccnos); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - vector positions = m->divideFile(fastaFileNames[s], processors); - - for (int i = 0; i < (positions.size()-1); i++) { - lines.push_back(new linePair(positions[i], positions[(i+1)])); - } - + //break up file #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + vector positions = m->divideFile(fastaFileNames[s], processors); + + for (int i = 0; i < (positions.size()-1); i++) { + lines.push_back(new linePair(positions[i], positions[(i+1)])); + } + if(processors == 1){ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); @@ -531,6 +532,7 @@ int ChimeraPintailCommand::execute(){ } #else + lines.push_back(new linePair(0, 1000)); numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } diff --git a/chopseqscommand.cpp b/chopseqscommand.cpp index e2b0fa4..abfd8dc 100644 --- a/chopseqscommand.cpp +++ b/chopseqscommand.cpp @@ -241,7 +241,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) { } if (stopSpot == 0) { temp = ""; } - else { temp = temp.substr(0, stopSpot); } + else { temp = temp.substr(0, stopSpot+1); } }else { if (!Short) { temp = ""; } //sequence too short @@ -294,7 +294,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) { } if (stopSpot == 0) { temp = ""; } - else { temp = temp.substr(0, stopSpot); } + else { temp = temp.substr(0, stopSpot+1); } }else { if (!Short) { temp = ""; } //sequence too short @@ -320,7 +320,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) { } if (stopSpot == 0) { temp = ""; } - else { temp = temp.substr(stopSpot+1); } + else { temp = temp.substr(stopSpot); } }else { if (!Short) { temp = ""; } //sequence too short } diff --git a/engine.cpp b/engine.cpp index a80eba4..670af78 100644 --- a/engine.cpp +++ b/engine.cpp @@ -406,7 +406,7 @@ bool BatchEngine::getInput(){ /***********************************************************************/ string BatchEngine::getNextCommand(ifstream& inputBatchFile) { try { - + string nextcommand = ""; if (inputBatchFile.eof()) { nextcommand = "quit()"; } diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index e96b7fd..d17f6a3 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -420,12 +420,14 @@ int FilterSeqsCommand::filterSequences() { MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) vector positions = m->divideFile(fastafileNames[s], processors); - + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; @@ -444,6 +446,7 @@ int FilterSeqsCommand::filterSequences() { if (m->control_pressed) { return 1; } #else + lines.push_back(new linePair(0, 1000)); int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; @@ -736,12 +739,14 @@ string FilterSeqsCommand::createFilter() { MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - vector positions = m->divideFile(fastafileNames[s], processors); - for (int i = 0; i < (positions.size()-1); i++) { - lines.push_back(new linePair(positions[i], positions[(i+1)])); - } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + vector positions = m->divideFile(fastafileNames[s], processors); + for (int i = 0; i < (positions.size()-1); i++) { + lines.push_back(new linePair(positions[i], positions[(i+1)])); + } + if(processors == 1){ int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; @@ -752,6 +757,7 @@ string FilterSeqsCommand::createFilter() { if (m->control_pressed) { return filterString; } #else + lines.push_back(new linePair(0, 1000)); int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; if (m->control_pressed) { return filterString; } diff --git a/getgroupcommand.cpp b/getgroupcommand.cpp index 194e8e1..8dae3f3 100644 --- a/getgroupcommand.cpp +++ b/getgroupcommand.cpp @@ -8,6 +8,7 @@ */ #include "getgroupcommand.h" +#include "inputdata.h" //********************************************************************************************************************** vector GetgroupCommand::setParameters(){ @@ -121,55 +122,20 @@ int GetgroupCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } - - //open shared file - m->openInputFile(sharedfile, in); //open output file outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "bootGroups"; m->openOutputFile(outputFile, out); - - int num, inputData, count; - count = 0; - string holdLabel, nextLabel, groupN, label; - - //read in first row since you know there is at least 1 group. - in >> label >> groupN >> num; - holdLabel = label; - - //output first group - m->mothurOut(groupN); m->mothurOutEndLine(); - out << groupN << '\t' << groupN << endl; - - //get rest of line - for(int i=0;i> inputData; - } - if (m->control_pressed) { outputTypes.clear(); in.close(); out.close(); m->mothurRemove(outputFile); return 0; } - - if (in.eof() != true) { in >> nextLabel; } + InputData input(sharedfile, "sharedfile"); + vector lookup = input.getSharedRAbundVectors(); - //read the rest of the groups info in - while ((nextLabel == holdLabel) && (in.eof() != true)) { - if (m->control_pressed) { outputTypes.clear(); in.close(); out.close(); m->mothurRemove(outputFile); return 0; } - - in >> groupN >> num; - count++; - - //output next group - m->mothurOut(groupN); m->mothurOutEndLine(); - out << groupN << '\t' << groupN << endl; - - //fill vector. - for(int i=0;i> inputData; - } - - if (in.eof() != true) { in >> nextLabel; } + for (int i = 0; i < lookup.size(); i++) { + out << lookup[i]->getGroup() << '\t' << lookup[i]->getGroup() << endl; + m->mothurOut(lookup[i]->getGroup()); m->mothurOutEndLine(); + delete lookup[i]; } - in.close(); out.close(); if (m->control_pressed) { m->mothurRemove(outputFile); return 0; } diff --git a/mothurmetastats.cpp b/mothurmetastats.cpp index 31542e8..a4f879e 100644 --- a/mothurmetastats.cpp +++ b/mothurmetastats.cpp @@ -512,6 +512,7 @@ vector MothurMetastats::calc_qvalues(vector& pValues) { } } + //from R code - replacing with spline and splint below //vector f_spline = smoothSpline(lambdas, pi0_hat, 3); //double pi0 = f_spline[(f_spline.size()-1)]; // this is the essential pi0_hat value diff --git a/mothurmetastats.h b/mothurmetastats.h index 6c1f2f8..229ffb6 100644 --- a/mothurmetastats.h +++ b/mothurmetastats.h @@ -32,6 +32,18 @@ class MothurMetastats { int permute_matrix(vector&, vector&, int, vector&, vector&, vector&); int permute_array(vector&); int calc_twosample_ts(vector&, int, vector&, vector&, vector&); + vector smoothSpline(vector, vector, int); + vector calc_qvalues(vector&); + vector sknot1(vector); + int nkn(int); + int OrderPValues(int, int, vector&, vector&); + int swapElements(int, int, vector&, vector&); + vector getSequence(int, int, int); + + int spline(vector&, vector&, int, int, vector&); + int splint(vector&, vector&, double&, double&, vector&); + + }; #endif diff --git a/mothurout.cpp b/mothurout.cpp index bc5e24e..001e8c4 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1182,7 +1182,6 @@ vector MothurOut::setFilePosEachLine(string filename, int& n vector MothurOut::divideFile(string filename, int& proc) { try{ - vector filePos; filePos.push_back(0); @@ -1190,7 +1189,7 @@ vector MothurOut::divideFile(string filename, int& proc) { unsigned long long size; filename = getFullPathName(filename); - + //get num bytes in file pFile = fopen (filename.c_str(),"rb"); if (pFile==NULL) perror ("Error opening file"); @@ -1199,7 +1198,9 @@ vector MothurOut::divideFile(string filename, int& proc) { size=ftell (pFile); fclose (pFile); } - + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //estimate file breaks unsigned long long chunkSize = 0; chunkSize = size / proc; @@ -1219,7 +1220,10 @@ vector MothurOut::divideFile(string filename, int& proc) { unsigned long long newSpot = spot; while (!in.eof()) { char c = in.get(); + if (c == '>') { in.putback(c); newSpot = in.tellg(); break; } + else if (int(c) == -1) { break; } + } //there was not another sequence before the end of the file @@ -1240,7 +1244,11 @@ vector MothurOut::divideFile(string filename, int& proc) { } proc = (filePos.size() - 1); - +#else + mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine(); + proc=1; + filePos.push_back(size); +#endif return filePos; } catch(exception& e) { diff --git a/preclustercommand.cpp b/preclustercommand.cpp index 0810a02..cf42568 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -60,6 +60,7 @@ PreClusterCommand::PreClusterCommand(){ vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; + outputTypes["map"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "PreClusterCommand"); @@ -94,6 +95,7 @@ PreClusterCommand::PreClusterCommand(string option) { vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; + outputTypes["map"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -181,11 +183,16 @@ int PreClusterCommand::execute(){ string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile)); string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile); string newNamesFile = fileroot + "precluster.names"; + string newMapFile = fileroot + "precluster.map"; //add group name if by group + outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile); + outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile); + if (bygroup) { //clear out old files ofstream outFasta; m->openOutputFile(newFastaFile, outFasta); outFasta.close(); ofstream outNames; m->openOutputFile(newNamesFile, outNames); outNames.close(); + newMapFile = fileroot + "precluster."; //parse fasta and name file by group SequenceParser* parser; @@ -194,16 +201,12 @@ int PreClusterCommand::execute(){ vector groups = parser->getNamesOfGroups(); -//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1) { driverGroups(parser, newFastaFile, newNamesFile, 0, groups.size(), groups); } - else { createProcessesGroups(parser, newFastaFile, newNamesFile, groups); } -//#else -// driverGroups(parser, newFastaFile, newNamesFile, 0, groups.size(), groups); -//#endif + if(processors == 1) { driverGroups(parser, newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups); } + else { createProcessesGroups(parser, newFastaFile, newNamesFile, newMapFile, groups); } delete parser; - if (m->control_pressed) { m->mothurRemove(newFastaFile); m->mothurRemove(newNamesFile); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //run unique.seqs for deconvolute results string inputString = "fasta=" + newFastaFile; @@ -231,14 +234,15 @@ int PreClusterCommand::execute(){ //reads fasta file and return number of seqs int numSeqs = readFASTA(); //fills alignSeqs and makes all seqs active - if (m->control_pressed) { return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); return 0; } if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); return 0; } - int count = process(); + int count = process(newMapFile); + outputNames.push_back(newMapFile); outputTypes["map"].push_back(newMapFile); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine(); m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -246,13 +250,12 @@ int PreClusterCommand::execute(){ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } - - if (m->control_pressed) { m->mothurRemove(newFastaFile); m->mothurRemove(newNamesFile); return 0; } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(newFastaFile); m->mothurOutEndLine(); outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile); - m->mothurOut(newNamesFile); m->mothurOutEndLine(); outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile @@ -276,7 +279,7 @@ int PreClusterCommand::execute(){ } } /**************************************************************************************************/ -int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newFName, string newNName, vector groups) { +int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newFName, string newNName, string newMFile, vector groups) { try { vector processIDS; @@ -306,7 +309,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); + num = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMFile, lines[process].start, lines[process].end, groups); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -316,7 +319,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF } //do my part - num = driverGroups(parser, newFName, newNName, lines[0].start, lines[0].end, groups); + num = driverGroups(parser, newFName, newNName, newMFile, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;imapFileNames.size(); j++) { + outputNames.push_back(pDataArray[i]->mapFileNames[j]); outputTypes["map"].push_back(pDataArray[i]->mapFileNames[j]); + } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } @@ -382,7 +388,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF } } /**************************************************************************************************/ -int PreClusterCommand::driverGroups(SequenceParser* parser, string newFFile, string newNFile, int start, int end, vector groups){ +int PreClusterCommand::driverGroups(SequenceParser* parser, string newFFile, string newNFile, string newMFile, int start, int end, vector groups){ try { int numSeqs = 0; @@ -407,7 +413,8 @@ int PreClusterCommand::driverGroups(SequenceParser* parser, string newFFile, str if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); m->control_pressed = true; return 0; } - int count = process(); + int count = process(newMFile+groups[i]+".map"); + outputNames.push_back(newMFile+groups[i]+".map"); outputTypes["map"].push_back(newMFile+groups[i]+".map"); if (m->control_pressed) { return 0; } @@ -427,8 +434,10 @@ int PreClusterCommand::driverGroups(SequenceParser* parser, string newFFile, str } } /**************************************************************************************************/ -int PreClusterCommand::process(){ +int PreClusterCommand::process(string newMapFile){ try { + ofstream out; + m->openOutputFile(newMapFile, out); //sort seqs by number of identical seqs sort(alignSeqs.begin(), alignSeqs.end(), comparePriority); @@ -444,10 +453,12 @@ int PreClusterCommand::process(){ if (alignSeqs[i].active) { //this sequence has not been merged yet + string chunk = alignSeqs[i].seq.getName() + "\t" + toString(alignSeqs[i].numIdentical) + "\t" + toString(0) + "\t" + alignSeqs[i].seq.getAligned() + "\n"; + //try to merge it with all smaller seqs for (int j = i+1; j < numSeqs; j++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { out.close(); return 0; } if (alignSeqs[j].active) { //this sequence has not been merged yet //are you within "diff" bases @@ -458,19 +469,24 @@ int PreClusterCommand::process(){ alignSeqs[i].names += ',' + alignSeqs[j].names; alignSeqs[i].numIdentical += alignSeqs[j].numIdentical; + chunk += alignSeqs[j].seq.getName() + "\t" + toString(alignSeqs[j].numIdentical) + "\t" + toString(mismatch) + "\t" + alignSeqs[j].seq.getAligned() + "\n"; + alignSeqs[j].active = 0; alignSeqs[j].numIdentical = 0; count++; } }//end if j active - }//end if i != j + }//end for loop j //remove from active list alignSeqs[i].active = 0; + out << "ideal_seq_" << (i+1) << '\t' << alignSeqs[i].numIdentical << endl << chunk << endl;; + }//end if active i if(i % 100 == 0) { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); } } + out.close(); if(numSeqs % 100 != 0) { m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); } @@ -620,7 +636,7 @@ void PreClusterCommand::printData(string newfasta, string newname){ m->openOutputFile(newfasta, outFasta); m->openOutputFile(newname, outNames); } - + for (int i = 0; i < alignSeqs.size(); i++) { if (alignSeqs[i].numIdentical != 0) { alignSeqs[i].seq.printSequence(outFasta); diff --git a/preclustercommand.h b/preclustercommand.h index 6f10618..3712302 100644 --- a/preclustercommand.h +++ b/preclustercommand.h @@ -22,8 +22,9 @@ struct seqPNode { Sequence seq; string names; bool active; + int diffs; seqPNode() {} - seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) {} + seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) { diffs = 0; } ~seqPNode() {} }; /************************************************************/ @@ -72,10 +73,10 @@ private: //int readNamesFASTA(); int calcMisMatches(string, string); void printData(string, string); //fasta filename, names file name - int process(); + int process(string); int loadSeqs(map&, vector&); - int driverGroups(SequenceParser*, string, string, int, int, vector groups); - int createProcessesGroups(SequenceParser*, string, string, vector); + int driverGroups(SequenceParser*, string, string, string, int, int, vector groups); + int createProcessesGroups(SequenceParser*, string, string, string, vector); }; /**************************************************************************************************/ @@ -86,20 +87,22 @@ struct preClusterData { string fastafile; string namefile; string groupfile; - string newFName, newNName; + string newFName, newNName, newMName; MothurOut* m; int start; int end; int diffs, threadID; vector groups; + vector mapFileNames; preClusterData(){} - preClusterData(string f, string n, string g, string nff, string nnf, vector gr, MothurOut* mout, int st, int en, int d, int tid) { + preClusterData(string f, string n, string g, string nff, string nnf, string nmf, vector gr, MothurOut* mout, int st, int en, int d, int tid) { fastafile = f; namefile = n; groupfile = g; newFName = nff; newNName = nnf; + newMName = nmf; m = mout; start = st; end = en; @@ -193,6 +196,10 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){ //////////////////////////////////////////////////// //int count = process(); - same function below + ofstream out; + pDataArray->m->openOutputFile(pDataArray->newMName+pDataArray->groups[k]+".map", out); + pDataArray->mapFileNames.push_back(pDataArray->newMName+pDataArray->groups[k]+".map"); + //sort seqs by number of identical seqs sort(alignSeqs.begin(), alignSeqs.end(), comparePriority); @@ -206,6 +213,8 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){ if (alignSeqs[i].active) { //this sequence has not been merged yet + string chunk = alignSeqs[i].seq.getName() + "\t" + toString(alignSeqs[i].numIdentical) + "\t" + toString(0) + "\t" + alignSeqs[i].seq.getAligned() + "\n"; + //try to merge it with all smaller seqs for (int j = i+1; j < numSeqs; j++) { @@ -229,18 +238,22 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){ alignSeqs[j].active = 0; alignSeqs[j].numIdentical = 0; + alignSeqs[j].diffs = mismatch; count++; + chunk += alignSeqs[j].seq.getName() + "\t" + toString(alignSeqs[j].numIdentical) + "\t" + toString(mismatch) + "\t" + alignSeqs[j].seq.getAligned() + "\n"; } }//end if j active - }//end if i != j + }//end for loop j //remove from active list alignSeqs[i].active = 0; + out << "ideal_seq_" << (i+1) << '\t' << alignSeqs[i].numIdentical << endl << chunk << endl; + }//end if active i if(i % 100 == 0) { pDataArray->m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); pDataArray->m->mothurOutEndLine(); } } - + out.close(); if(numSeqs % 100 != 0) { pDataArray->m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); pDataArray->m->mothurOutEndLine(); } //////////////////////////////////////////////////// diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 9a6998e..ec2d8e0 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -283,10 +283,15 @@ int ScreenSeqsCommand::execute(){ getSummary(positions); } else { + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) positions = m->divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); - } + } + #else + positions.push_back(0); positions.push_back(1000); + lines.push_back(new linePair(0, 1000)); + #endif } string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "good" + m->getExtension(fastafile); @@ -659,12 +664,15 @@ int ScreenSeqsCommand::getSummary(vector& positions){ vector ambigBases; vector longHomoPolymer; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) vector positions = m->divideFile(fastafile, processors); - + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } - +#else + lines.push_back(new linePair(0, 1000)); +#endif #ifdef USE_MPI int pid; @@ -778,7 +786,7 @@ int ScreenSeqsCommand::driverCreateSummary(vector& startPosition, vectormothurOut("Optimizing sequence: " + toString(count)); m->mothurOutEndLine(); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= filePos->end)) { break; } diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 1cbb91a..89ea3f9 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -203,8 +203,8 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { // ...at some point should added some additional type checking... string temp; temp = validParameter.validFile(parameters, "flip", false); - if (temp == "not found"){ flip = 0; } - else if(m->isTrue(temp)) { flip = 1; } + if (temp == "not found") { flip = 0; } + else { flip = m->isTrue(temp); } temp = validParameter.validFile(parameters, "oligos", true); if (temp == "not found"){ oligoFile = ""; }