From: westcott Date: Mon, 28 Nov 2011 17:39:48 +0000 (+0000) Subject: added fontsize to heatmap.sim, paralellized chimera.slayer byGroup for all os's,... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=88fbc534a92cb91900e98a3288dfa1f68828b69b added fontsize to heatmap.sim, paralellized chimera.slayer byGroup for all os's, fixed mpi code in screen.seqs optimize function, fixed bellerophon mpi version, fixed set.dir command for mpi version. --- diff --git a/bellerophon.cpp b/bellerophon.cpp index 5a0a444..9dd21a4 100644 --- a/bellerophon.cpp +++ b/bellerophon.cpp @@ -274,7 +274,7 @@ int Bellerophon::getChimeras() { MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status); char* buf = new char[length]; - MPI_Recv(&buf, length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status); + MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status); string temp = buf; if (temp.length() > length) { temp = temp.substr(0, length); } @@ -293,7 +293,7 @@ int Bellerophon::getChimeras() { //played with this a bit, but it may be better to try user-defined datatypes with set string lengths?? vector MPIBestSend = getBestWindow(lines[0]); pref.clear(); - + //send your result to parent for (int i = 0; i < numSeqs; i++) { diff --git a/catchallcommand.cpp b/catchallcommand.cpp index 39cc44f..857f684 100644 --- a/catchallcommand.cpp +++ b/catchallcommand.cpp @@ -213,7 +213,7 @@ int CatchAllCommand::execute() { //for each label the user selected while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - + if(allLines == 1 || labels.count(sabund->getLabel()) == 1){ m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); @@ -221,7 +221,7 @@ int CatchAllCommand::execute() { //create catchall input file from mothur's inputfile string filename = process(sabund, inputFileNames[p]); string outputPath = m->getPathName(filename); - + //create system command string catchAllCommand = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) @@ -266,7 +266,7 @@ int CatchAllCommand::execute() { //create catchall input file from mothur's inputfile string filename = process(sabund, inputFileNames[p]); string outputPath = m->getPathName(filename); - + //create system command string catchAllCommand = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) diff --git a/chimera.cpp b/chimera.cpp index 7158416..53e7732 100644 --- a/chimera.cpp +++ b/chimera.cpp @@ -122,69 +122,109 @@ vector Chimera::readSeqs(string file) { m->mothurOut("Reading sequences from " + file + "..."); cout.flush(); - #ifdef USE_MPI + #ifdef USE_MPI int pid, processors; vector positions; int numSeqs; int tag = 2001; - - MPI_Status status; MPI_File inMPI; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - //char* inFileName = new char[file.length()]; - //memcpy(inFileName, file.c_str(), file.length()); - - char inFileName[1024]; - strcpy(inFileName, file.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - //delete inFileName; - - if (pid == 0) { + MPI_Status status; + + if (byGroup) { + char inFileName[1024]; + strcpy(inFileName, file.c_str()); + + MPI_File_open(MPI_COMM_SELF, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs - - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + + //read file + for(int i=0;icontrol_pressed) { MPI_File_close(&inMPI); return container; } + + //read next sequence + int seqlength = positions[i+1] - positions[i]; + char* buf4 = new char[seqlength]; + + MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status); + + string tempBuf = buf4; + if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); } + delete buf4; + + istringstream iss (tempBuf,istringstream::in); + + Sequence* current = new Sequence(iss); + if (current->getName() != "") { + if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length + else if (length != current->getAligned().length()) { unaligned = true; } + + container.push_back(current); + if (rdb->save) { rdb->referenceSeqs.push_back(*current); } + } } - }else{ - MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - positions.resize(numSeqs+1); - MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); - } - - //read file - for(int i=0;icontrol_pressed) { MPI_File_close(&inMPI); return container; } - - //read next sequence - int seqlength = positions[i+1] - positions[i]; - char* buf4 = new char[seqlength]; - - MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status); - string tempBuf = buf4; - if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); } - delete buf4; - - istringstream iss (tempBuf,istringstream::in); - - Sequence* current = new Sequence(iss); - if (current->getName() != "") { - if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length - else if (length != current->getAligned().length()) { unaligned = true; } - - container.push_back(current); - if (rdb->save) { rdb->referenceSeqs.push_back(*current); } + MPI_File_close(&inMPI); + + }else { + + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); + + //char* inFileName = new char[file.length()]; + //memcpy(inFileName, file.c_str(), file.length()); + + char inFileName[1024]; + strcpy(inFileName, file.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + //delete inFileName; + + if (pid == 0) { + positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } + }else{ + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + positions.resize(numSeqs+1); + MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + } + + //read file + for(int i=0;icontrol_pressed) { MPI_File_close(&inMPI); return container; } + + //read next sequence + int seqlength = positions[i+1] - positions[i]; + char* buf4 = new char[seqlength]; + + MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status); + + string tempBuf = buf4; + if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); } + delete buf4; + + istringstream iss (tempBuf,istringstream::in); + + Sequence* current = new Sequence(iss); + if (current->getName() != "") { + if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length + else if (length != current->getAligned().length()) { unaligned = true; } + + container.push_back(current); + if (rdb->save) { rdb->referenceSeqs.push_back(*current); } + } } + + MPI_File_close(&inMPI); + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case } - - MPI_File_close(&inMPI); - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else ifstream in; @@ -198,7 +238,7 @@ vector Chimera::readSeqs(string file) { Sequence* current = new Sequence(in); m->gobble(in); if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length - else if (length != current->getAligned().length()) { unaligned = true; } + else if (length != current->getAligned().length()) { unaligned = true; } if (current->getName() != "") { container.push_back(current); diff --git a/chimera.h b/chimera.h index 1df4d6b..bb4c29c 100644 --- a/chimera.h +++ b/chimera.h @@ -146,7 +146,7 @@ class Chimera { public: - Chimera(){ m = MothurOut::getInstance(); length = 0; unaligned = false; } + Chimera(){ m = MothurOut::getInstance(); length = 0; unaligned = false; byGroup = false; } virtual ~Chimera(){ for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } for (int i = 0; i < filteredTemplateSeqs.size(); i++) { delete filteredTemplateSeqs[i]; } }; virtual bool getUnaligned() { return unaligned; } virtual int getLength() { return length; } @@ -174,7 +174,7 @@ class Chimera { vector templateSeqs; vector filteredTemplateSeqs; - bool filter, unaligned; + bool filter, unaligned, byGroup; int length; string seqMask, filterString, outputDir, templateFileName; Sequence* getSequence(string); //find sequence from name diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 458e89d..9ceba52 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -46,6 +46,56 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num } } //*************************************************************************************************************** +//template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, + int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() { + try { + byGroup = bg; + fastafile = file; templateSeqs = readSeqs(fastafile); + templateFileName = temp; + searchMethod = mode; + kmerSize = k; + match = ms; + misMatch = mms; + window = win; + divR = div; + minSim = minsim; + minCov = mincov; + minBS = minbs; + minSNP = minsnp; + parents = par; + iters = it; + increment = inc; + numWanted = numw; + realign = r; + trimChimera = trim; + priority = prior; + numNoParents = 0; + blastlocation = blas; + threadID = tid; + + + createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + if (searchMethod == "distance") { + //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { break; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); + exit(1); + } +} +//*************************************************************************************************************** //template=self ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() { @@ -73,6 +123,7 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map= minBS) || (chimeraResults[0].bsb >= minBS)) { - m->mothurOut(toString(threadID) +"\t"+ querySeq.getName() + "\tyes"); m->mothurOutEndLine(); + m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine(); outAcc << querySeq.getName() << endl; if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } diff --git a/chimeraslayer.h b/chimeraslayer.h index c2815a1..c409c50 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -26,6 +26,7 @@ class ChimeraSlayer : public Chimera { public: ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int); ChimeraSlayer(string, string, bool, map&, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int); + ChimeraSlayer(string, string, bool, map&, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int, bool); ~ChimeraSlayer(); diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index f15581f..0512be1 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -535,7 +535,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { //until we resolve the issue 10-18-11 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) #else - processors=1; + //processors=1; #endif } } @@ -549,16 +549,25 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { int ChimeraSlayerCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } - + for (int s = 0; s < fastaFileNames.size(); s++) { m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); int start = time(NULL); + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it + string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera"; + string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos"; + string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta"; - //you provided a groupfile - string groupFile = ""; - if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; } + //clears files + ofstream out, out1, out2; + m->openOutputFile(outputFileName, out); out.close(); + m->openOutputFile(accnosFileName, out1); out1.close(); + if (trim) { m->openOutputFile(trimFastaFileName, out2); out2.close(); } + outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); + outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); + if (trim) { outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); } //maps a filename to priority map. //if no groupfile this is fastafileNames[s] -> prioirity @@ -569,143 +578,69 @@ int ChimeraSlayerCommand::execute(){ fileToPriority[fastaFileNames[s]] = priority; //default fileGroup[fastaFileNames[s]] = "noGroup"; SequenceParser* parser = NULL; - int totalSeqs = 0; int totalChimeras = 0; + lines.clear(); - if ((templatefile == "self") && (groupFile == "")) { - fileGroup.clear(); - fileToPriority.clear(); - if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } - string nameFile = ""; - if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one - nameFile = nameFileNames[s]; - }else { nameFile = getNamesFile(fastaFileNames[s]); } - - //sort fastafile by abundance, returns new sorted fastafile name - m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); - priority = sortFastaFile(fastaFileNames[s], nameFile); - m->mothurOut("Done."); m->mothurOutEndLine(); - - fileToPriority.clear(); - fileGroup.clear(); - fileToPriority[fastaFileNames[s]] = priority; - fileGroup[fastaFileNames[s]] = "noGroup"; - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - }else if ((templatefile == "self") && (groupFile != "")) { - fileGroup.clear(); - fileToPriority.clear(); - if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } - string nameFile = ""; - if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one - nameFile = nameFileNames[s]; - }else { nameFile = getNamesFile(fastaFileNames[s]); } - - //Parse sequences by group - parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile); - vector groups = parser->getNamesOfGroups(); - - for (int i = 0; i < groups.size(); i++) { - vector thisGroupsSeqs = parser->getSeqs(groups[i]); - map thisGroupsMap = parser->getNameMap(groups[i]); - string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta"; - priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); - fileToPriority[newFastaFile] = priority; - fileGroup[newFastaFile] = groups[i]; - } - } - - if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera"; - string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos"; - string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta"; + if (templatefile == "self") { setUpForSelfReference(parser, fileGroup, fileToPriority, s); } - //clears files - ofstream out, out1, out2; - m->openOutputFile(outputFileName, out); out.close(); - m->openOutputFile(accnosFileName, out1); out1.close(); - if (trim) { m->openOutputFile(trimFastaFileName, out2); out2.close(); } - outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); - outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); - if (trim) { outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); } - - - for (itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { - + if (m->control_pressed) { if (parser != NULL) { delete parser; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + if (fileToPriority.size() == 1) { //you running without a groupfile + itFile = fileToPriority.begin(); string thisFastaName = itFile->first; map thisPriority = itFile->second; - - //this is true when you have parsed by groups - if (fileToPriority.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); } - - string thisoutputFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera"; - string thisaccnosFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos"; - string thistrimFastaFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta"; - - //create chimera here if you are mac or linux because fork will copy for you. Create in create processes instead if you are windows. - if (processors == 1) { templateSeqsLength = setupChimera(thisFastaName, thisPriority); } +#ifdef USE_MPI + MPIExecute(thisFastaName, outputFileName, accnosFileName, trimFastaFileName, thisPriority); +#else + //break up file + vector positions; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + positions = m->divideFile(thisFastaName, processors); + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } +#else + if (processors == 1) { lines.push_back(linePair(0, 1000)); } else { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI - templateSeqsLength = setupChimera(thisFastaName, thisPriority); - #endif - } - - if (m->control_pressed) { if (parser != NULL) { delete parser; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - - #ifdef USE_MPI - MPIExecute(thisFastaName, thisoutputFileName, thisaccnosFileName, thistrimFastaFileName); - if (m->control_pressed) { outputTypes.clear(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - #else - //break up file - vector positions; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - positions = m->divideFile(thisFastaName, processors); - for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } - #else - if (processors == 1) { lines.push_back(new linePair(0, 1000)); } - else { - positions = m->setFilePosFasta(thisFastaName, numSeqs); + positions = m->setFilePosFasta(thisFastaName, numSeqs); - //figure out how many sequences you have to process - int numSeqsPerProcessor = numSeqs / processors; - for (int i = 0; i < processors; i++) { - int startIndex = i * numSeqsPerProcessor; - if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } - lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); - } + //figure out how many sequences you have to process + int numSeqsPerProcessor = numSeqs / processors; + for (int i = 0; i < processors; i++) { + int startIndex = i * numSeqsPerProcessor; + if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } + lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } - #endif - - if(processors == 1){ - numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName); - - int numNoParents = chimera->getNumNoParents(); - if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } - - }else{ numSeqs = createProcesses(thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName); } - - if (m->control_pressed) { if (parser != NULL) { delete parser; } outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } - - #endif - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI - delete chimera; - #endif + } +#endif + if(processors == 1){ numSeqs = driver(lines[0], outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority); } + else{ numSeqs = createProcesses(outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority); } - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + if (m->control_pressed) { if (parser != NULL) { delete parser; } outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } +#endif + }else { //you have provided a groupfile +#ifdef USE_MPI + MPIExecuteGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); +#else + if (processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); } + else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); } //destroys fileToPriority +#endif + +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - //append files - m->appendFiles(thisoutputFileName, outputFileName); m->mothurRemove(thisoutputFileName); - totalChimeras = m->appendFiles(thisaccnosFileName, accnosFileName); m->mothurRemove(thisaccnosFileName); - if (trim) { m->appendFiles(thistrimFastaFileName, trimFastaFileName); m->mothurRemove(thistrimFastaFileName); } + if (pid == 0) { +#endif - totalSeqs += numSeqs; + totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName); +#ifdef USE_MPI + } + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait +#endif } - - if (fileToPriority.size() > 1) { totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName); } - + if (parser != NULL) { delete parser; } - m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences."); m->mothurOutEndLine(); + m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } //set accnos file as new current accnosfile @@ -736,7 +671,117 @@ int ChimeraSlayerCommand::execute(){ } } //********************************************************************************************************************** -int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, string accnosFileName, string trimFastaFileName){ +int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosFileName, string trimFastaFileName, map >& fileToPriority, map& fileGroup){ + try { +#ifdef USE_MPI + int pid; + int tag = 2001; + + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); + + //put filenames in a vector, then pass each process a starting and ending point in the vector + //all processes already have the fileToPriority and fileGroup, they just need to know which files to process + map >::iterator itFile; + vector filenames; + for(itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { filenames.push_back(itFile->first); } + + int numGroupsPerProcessor = filenames.size() / processors; + int startIndex = pid * numGroupsPerProcessor; + int endIndex = (pid+1) * numGroupsPerProcessor; + if(pid == (processors - 1)){ endIndex = filenames.size(); } + + vector MPIPos; + + MPI_File outMPI; + MPI_File outMPIAccnos; + MPI_File outMPIFasta; + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + int inMode=MPI_MODE_RDONLY; + + char outFilename[1024]; + strcpy(outFilename, outputFileName.c_str()); + + char outAccnosFilename[1024]; + strcpy(outAccnosFilename, accnosFileName.c_str()); + + char outFastaFilename[1024]; + strcpy(outFastaFilename, trimFastaFileName.c_str()); + + MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); + MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); } + + if (m->control_pressed) { MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; } + + //print headers + if (pid == 0) { //you are the root process + m->mothurOutEndLine(); + m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results."); + m->mothurOutEndLine(); + + string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; + + //print header + int length = outTemp.length(); + char* buf2 = new char[length]; + memcpy(buf2, outTemp.c_str(), length); + + MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status); + delete buf2; + } + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait + + for (int i = startIndex; i < endIndex; i++) { + + int start = time(NULL); + int num = 0; + string thisFastaName = filenames[i]; + map thisPriority = fileToPriority[thisFastaName]; + + char inFileName[1024]; + strcpy(inFileName, thisFastaName.c_str()); + MPI_File inMPI; + MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + + MPIPos = m->setFilePosFasta(thisFastaName, num); //fills MPIPos, returns numSeqs + + cout << endl << "Checking sequences from group: " << fileGroup[thisFastaName] << "." << endl; + + driverMPI(0, num, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, thisFastaName, thisPriority, true); + numSeqs += num; + + MPI_File_close(&inMPI); + m->mothurRemove(thisFastaName); + + cout << endl << "It took " << toString(time(NULL) - start) << " secs to check " + toString(num) + " sequences from group " << fileGroup[thisFastaName] << "." << endl; + } + + if (pid == 0) { + for(int i = 1; i < processors; i++) { + int temp = 0; + MPI_Recv(&temp, 1, MPI_INT, i, 2001, MPI_COMM_WORLD, &status); + numSeqs += temp; + } + }else{ MPI_Send(&numSeqs, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); } + + MPI_File_close(&outMPI); + MPI_File_close(&outMPIAccnos); + if (trim) { MPI_File_close(&outMPIFasta); } + + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait +#endif + return 0; + + }catch(exception& e) { + m->errorOut(e, "ChimeraSlayerCommand", "MPIExecuteGroups"); + exit(1); + } +} +//********************************************************************************************************************** +int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, string accnosFileName, string trimFastaFileName, map& priority){ try { #ifdef USE_MPI @@ -773,7 +818,7 @@ int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, st MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); } - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); delete chimera; return 0; } + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; } if (pid == 0) { //you are the root process m->mothurOutEndLine(); @@ -810,19 +855,9 @@ int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, st } //do your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos); - - int numNoParents = chimera->getNumNoParents(); - int temp; - for(int i = 1; i < processors; i++) { - MPI_Recv(&temp, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &status); - numNoParents += temp; - } - - - if (numSeqs == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); delete chimera; return 0; } + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; } }else{ //you are a child process if (templatefile != "self") { //if template=self we can only use 1 processor @@ -836,12 +871,9 @@ int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, st if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //do your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos); - - int numNoParents = chimera->getNumNoParents(); - MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false); - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); delete chimera; return 0; } + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; } } } @@ -855,7 +887,7 @@ int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, st #endif - return 0; + return numSeqs; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "MPIExecute"); @@ -953,9 +985,10 @@ int ChimeraSlayerCommand::deconvoluteResults(SequenceParser* parser, string outp itChimeras = chimerasInFile.find(itUnique->second); if (itChimeras == chimerasInFile.end()) { + //is this sequence not already in the file itNames = namesInFile.find((itUnique->second)); - if (itNames == namesInFile.end()) {cout << itUnique->second << endl; out << itUnique->second << '\t' << "no" << endl; namesInFile.insert(itUnique->second); } + if (itNames == namesInFile.end()) { out << itUnique->second << '\t' << "no" << endl; namesInFile.insert(itUnique->second); } } } }else { //read the rest of the line @@ -980,6 +1013,7 @@ int ChimeraSlayerCommand::deconvoluteResults(SequenceParser* parser, string outp //then you really are a no so print, otherwise skip if (itChimeras == chimerasInFile.end()) { print = true; } + }else{ print = true; } } } @@ -1052,29 +1086,57 @@ int ChimeraSlayerCommand::deconvoluteResults(SequenceParser* parser, string outp m->errorOut(e, "ChimeraSlayerCommand", "deconvoluteResults"); exit(1); } -} +} //********************************************************************************************************************** -int ChimeraSlayerCommand::setupChimera(string inputFile, map& priority){ +int ChimeraSlayerCommand::setUpForSelfReference(SequenceParser*& parser, map& fileGroup, map >& fileToPriority, int s){ try { - if (templatefile != "self") { //you want to run slayer with a reference template - chimera = new ChimeraSlayer(inputFile, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + fileGroup.clear(); + fileToPriority.clear(); + + string nameFile = ""; + if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one + nameFile = nameFileNames[s]; + }else { nameFile = getNamesFile(fastaFileNames[s]); } + + //you provided a groupfile + string groupFile = ""; + if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; } + + if (groupFile == "") { + if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } + + //sort fastafile by abundance, returns new sorted fastafile name + m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); + priority = sortFastaFile(fastaFileNames[s], nameFile); + m->mothurOut("Done."); m->mothurOutEndLine(); + + fileToPriority[fastaFileNames[s]] = priority; + fileGroup[fastaFileNames[s]] = "noGroup"; }else { - chimera = new ChimeraSlayer(inputFile, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + //Parse sequences by group + parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile); + vector groups = parser->getNamesOfGroups(); + + for (int i = 0; i < groups.size(); i++) { + vector thisGroupsSeqs = parser->getSeqs(groups[i]); + map thisGroupsMap = parser->getNameMap(groups[i]); + string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta"; + priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); + fileToPriority[newFastaFile] = priority; + fileGroup[newFastaFile] = groups[i]; + } } - if (m->control_pressed) { delete chimera; return 0; } - if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; } - - return (chimera->getLength()); + return 0; } catch(exception& e) { - m->errorOut(e, "ChimeraSlayerCommand", "setupChimera"); + m->errorOut(e, "ChimeraSlayerCommand", "setUpForSelfReference"); exit(1); } } -//********************************************************************************************************************** +//********************************************************************************************************************** string ChimeraSlayerCommand::getNamesFile(string& inputFile){ try { string nameFile = ""; @@ -1107,8 +1169,196 @@ string ChimeraSlayerCommand::getNamesFile(string& inputFile){ } //********************************************************************************************************************** -int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos, string fasta){ +int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string fasta, map >& fileToPriority, map& fileGroup){ + try { + int totalSeqs = 0; + + for (map >::iterator itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { + + if (m->control_pressed) { return 0; } + + int start = time(NULL); + string thisFastaName = itFile->first; + map thisPriority = itFile->second; + string thisoutputFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera"; + string thisaccnosFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos"; + string thistrimFastaFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta"; + + m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); + + lines.clear(); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int proc = 1; + vector positions = m->divideFile(thisFastaName, proc); + lines.push_back(linePair(positions[0], positions[1])); +#else + lines.push_back(linePair(0, 1000)); +#endif + int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority); + + //append files + m->appendFiles(thisoutputFileName, outputFName); m->mothurRemove(thisoutputFileName); + m->appendFiles(thisaccnosFileName, accnos); m->mothurRemove(thisaccnosFileName); + if (trim) { m->appendFiles(thistrimFastaFileName, fasta); m->mothurRemove(thistrimFastaFileName); } + m->mothurRemove(thisFastaName); + + totalSeqs += numSeqs; + + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); + } + + return totalSeqs; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayerCommand", "driverGroups"); + exit(1); + } +} +/**************************************************************************************************/ +int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accnos, string fasta, map >& fileToPriority, map& fileGroup) { try { + int process = 1; + int num = 0; + processIDS.clear(); + + if (fileToPriority.size() < processors) { processors = fileToPriority.size(); } + + int groupsPerProcessor = fileToPriority.size() / processors; + int remainder = fileToPriority.size() % processors; + + vector< map > > breakUp; + + for (int i = 0; i < processors; i++) { + map > thisFileToPriority; + map >::iterator itFile; + int count = 0; + int enough = groupsPerProcessor; + if (i == 0) { enough = groupsPerProcessor + remainder; } + + for (itFile = fileToPriority.begin(); itFile != fileToPriority.end();) { + thisFileToPriority[itFile->first] = itFile->second; + fileToPriority.erase(itFile++); + count++; + if (count == enough) { break; } + } + breakUp.push_back(thisFileToPriority); + } + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup); + + //pass numSeqs to parent + ofstream out; + string tempFile = outputFName + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out.close(); + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup); + + //force parent to wait until all the processes are done + for (int i=0;iopenInputFile(tempFile, in); + if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } + in.close(); m->mothurRemove(tempFile); + } +#else + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the slayerData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for(int i=1; icount; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + + //append output files + for(int i=0;iappendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName); + m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp")); + + m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos); + m->mothurRemove((accnos + toString(processIDS[i]) + ".temp")); + + if (trim) { + m->appendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta); + m->mothurRemove((fasta + toString(processIDS[i]) + ".temp")); + } + } + + + return num; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayerCommand", "createProcessesGroups"); + exit(1); + } +} +//********************************************************************************************************************** + +int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string filename, string accnos, string fasta, map& priority){ + try { + + Chimera* chimera; + if (templatefile != "self") { //you want to run slayer with a reference template + chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + }else { + chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + } + + if (m->control_pressed) { delete chimera; return 0; } + + if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; } + templateSeqsLength = chimera->getLength(); + ofstream out; m->openOutputFile(outputFName, out); @@ -1121,16 +1371,16 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f ifstream inFASTA; m->openInputFile(filename, inFASTA); - inFASTA.seekg(filePos->start); + inFASTA.seekg(filePos.start); - if (filePos->start == 0) { chimera->printHeader(out); } + if (filePos.start == 0) { chimera->printHeader(out); } bool done = false; int count = 0; while (!done) { - if (m->control_pressed) { out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; } + if (m->control_pressed) { delete chimera; out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; } Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA); string candidateAligned = candidateSeq->getAligned(); @@ -1142,7 +1392,7 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f //find chimeras chimera->getChimeras(candidateSeq); - if (m->control_pressed) { delete candidateSeq; return 1; } + if (m->control_pressed) { delete chimera; delete candidateSeq; return 1; } //if you are not chimeric, then check each half data_results wholeResults = chimera->getResults(); @@ -1199,7 +1449,7 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) unsigned long long pos = inFASTA.tellg(); - if ((pos == -1) || (pos >= filePos->end)) { break; } + if ((pos == -1) || (pos >= filePos.end)) { break; } #else if (inFASTA.eof()) { break; } #endif @@ -1211,12 +1461,18 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f //report progress if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); } + int numNoParents = chimera->getNumNoParents(); + if (numNoParents == count) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } + out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); + delete chimera; return count; + + } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "driver"); @@ -1225,15 +1481,27 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f } //********************************************************************************************************************** #ifdef USE_MPI -int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector& MPIPos){ - try { +int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector& MPIPos, string filename, map& priority, bool byGroup){ + try { MPI_Status status; int pid; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + Chimera* chimera; + if (templatefile != "self") { //you want to run slayer with a reference template + chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + }else { + chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand(), byGroup); + } + + if (m->control_pressed) { delete chimera; return 0; } + + if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; } + templateSeqsLength = chimera->getLength(); + for(int i=0;icontrol_pressed) { return 1; } + if (m->control_pressed) { delete chimera; return 1; } //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; @@ -1259,7 +1527,7 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil //find chimeras chimera->getChimeras(candidateSeq); - if (m->control_pressed) { delete candidateSeq; return 1; } + if (m->control_pressed) { delete chimera; delete candidateSeq; return 1; } //if you are not chimeric, then check each half data_results wholeResults = chimera->getResults(); @@ -1339,7 +1607,10 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil //report progress if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); } - + int numNoParents = chimera->getNumNoParents(); + if (numNoParents == num) { cout << "[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors." << endl; } + + delete chimera; return 0; } catch(exception& e) { @@ -1351,11 +1622,10 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil /**************************************************************************************************/ -int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta) { +int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta, map& thisPriority) { try { int process = 0; int num = 0; - int numNoParents = 0; processIDS.clear(); #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) @@ -1367,13 +1637,13 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp"); + num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", thisPriority); //pass numSeqs to parent ofstream out; string tempFile = outputFileName + toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); - out << num << '\t' << chimera->getNumNoParents() << endl; + out << num << endl; out.close(); exit(0); }else { @@ -1393,7 +1663,7 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename ifstream in; string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp"; m->openInputFile(tempFile, in); - if (!in.eof()) { int tempNum = 0; int tempNumParents = 0; in >> tempNum >> tempNumParents; num += tempNum; numNoParents += tempNumParents; } + if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } in.close(); m->mothurRemove(tempFile); } #else @@ -1410,7 +1680,7 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename //Create processor worker threads. for( int i=0; istart, lines[i]->end, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i); + slayerData* tempslayer = new slayerData((outputFileName + extension), (fasta + extension), (accnos + extension), filename, templatefile, search, blastlocation, trimera, trim, realign, m, lines[i].start, lines[i].end, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i); pDataArray.push_back(tempslayer); processIDS.push_back(i); @@ -1425,12 +1695,10 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ num += pDataArray[i]->count; - numNoParents += pDataArray[i]->numNoParents; CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif - if (num == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); rename((accnos + toString(processIDS[0]) + ".temp").c_str(), accnos.c_str()); diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index 54b8fa1..6f3455d 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -43,29 +43,32 @@ private: }; vector processIDS; //processid - vector lines; + vector lines; - int driver(linePair*, string, string, string, string); - int createProcesses(string, string, string, string); + int driver(linePair, string, string, string, string, map&); + int createProcesses(string, string, string, string, map&); int divideInHalf(Sequence, string&, string&); map sortFastaFile(string, string); map sortFastaFile(vector&, map&, string newFile); string getNamesFile(string&); - int setupChimera(string, map&); - int MPIExecute(string, string, string, string); + //int setupChimera(string,); + int MPIExecute(string, string, string, string, map&); int deconvoluteResults(SequenceParser*, string, string, string); map priority; + int setUpForSelfReference(SequenceParser*&, map&, map >&, int); + int driverGroups(string, string, string, map >&, map&); + int createProcessesGroups(string, string, string, map >&, map&); + int MPIExecuteGroups(string, string, string, map >&, map&); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&, string, map&, bool); #endif bool abort, realign, trim, trimera, save; string fastafile, groupfile, templatefile, outputDir, search, namefile, blastlocation; int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength; float divR; - Chimera* chimera; vector outputNames; vector fastaFileNames; @@ -98,6 +101,8 @@ struct slayerData { int count; int numNoParents; int threadId; + map > fileToPriority; + map fileGroup; slayerData(){} slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long long st, unsigned long long en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map prior, int tid) { @@ -132,6 +137,38 @@ struct slayerData { count = 0; numNoParents = 0; } + slayerData(string o, string fa, string ac, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, map >& fPriority, map& fileG, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map prior, int tid) { + outputFName = o; + fasta = fa; + accnos = ac; + templatefile = te; + search = se; + blastlocation = bl; + trimera = tri; + trim = trm; + realign = re; + m = mout; + fileGroup = fileG; + fileToPriority = fPriority; + ksize = ks; + match = ma; + mismatch = mis; + window = win; + minSimilarity = minS; + minCoverage = minC; + minBS = miBS; + minSNP = minSN; + parents = par; + iters = it; + increment = inc; + numwanted = numw; + divR = div; + priority = prior; + threadId = tid; + count = 0; + numNoParents = 0; + } + }; /**************************************************************************************************/ @@ -287,6 +324,8 @@ static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){ if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); } pDataArray->numNoParents = chimera->getNumNoParents(); + if (pDataArray->numNoParents == count) { pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); } + out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } @@ -301,6 +340,199 @@ static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){ exit(1); } } + +/**************************************************************************************************/ + +static DWORD WINAPI MySlayerGroupThreadFunction(LPVOID lpParam){ + slayerData* pDataArray; + pDataArray = (slayerData*)lpParam; + + try { + + int totalSeqs = 0; + + for (map >::iterator itFile = pDataArray->fileToPriority.begin(); itFile != pDataArray->fileToPriority.end(); itFile++) { + + if (pDataArray->m->control_pressed) { return 0; } + + int start = time(NULL); + string thisFastaName = itFile->first; + map thisPriority = itFile->second; + string thisoutputFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.chimera"; + string thisaccnosFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.accnos"; + string thistrimFastaFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.fasta"; + + pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group: " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine(); + + //int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + ofstream out; + pDataArray->m->openOutputFile(thisoutputFileName, out); + + ofstream out2; + pDataArray->m->openOutputFile(thisaccnosFileName, out2); + + ofstream out3; + if (pDataArray->trim) { pDataArray->m->openOutputFile(thistrimFastaFileName, out3); } + + ifstream inFASTA; + pDataArray->m->openInputFile(thisFastaName, inFASTA); + + Chimera* chimera; + chimera = new ChimeraSlayer(thisFastaName, pDataArray->templatefile, pDataArray->trim, thisPriority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId); + chimera->printHeader(out); + + int numSeqs = 0; + + if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 0; } + + if (chimera->getUnaligned()) { + pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine(); + out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); + delete chimera; + return 0; + } + int templateSeqsLength = chimera->getLength(); + + bool done = false; + while (!done) { + + if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1; } + + Sequence* candidateSeq = new Sequence(inFASTA); pDataArray->m->gobble(inFASTA); + string candidateAligned = candidateSeq->getAligned(); + + if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file + if (candidateSeq->getAligned().length() != templateSeqsLength) { + pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine(); + }else{ + //find chimeras + chimera->getChimeras(candidateSeq); + + if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete candidateSeq; delete chimera; return 1; } + + //if you are not chimeric, then check each half + data_results wholeResults = chimera->getResults(); + + //determine if we need to split + bool isChimeric = false; + + if (wholeResults.flag == "yes") { + string chimeraFlag = "no"; + if( (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR) + || + (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; } + + + if (chimeraFlag == "yes") { + if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; } + } + } + + if ((!isChimeric) && pDataArray->trimera) { + + //split sequence in half by bases + string leftQuery, rightQuery; + Sequence tempSeq(candidateSeq->getName(), candidateAligned); + //divideInHalf(tempSeq, leftQuery, rightQuery); + string queryUnAligned = tempSeq.getUnaligned(); + int numBases = int(queryUnAligned.length() * 0.5); + + string queryAligned = tempSeq.getAligned(); + leftQuery = tempSeq.getAligned(); + rightQuery = tempSeq.getAligned(); + + int baseCount = 0; + int leftSpot = 0; + for (int i = 0; i < queryAligned.length(); i++) { + //if you are a base + if (isalpha(queryAligned[i])) { + baseCount++; + } + + //if you have half + if (baseCount >= numBases) { leftSpot = i; break; } //first half + } + + //blank out right side + for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; } + + //blank out left side + for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; } + + //run chimeraSlayer on each piece + Sequence* left = new Sequence(candidateSeq->getName(), leftQuery); + Sequence* right = new Sequence(candidateSeq->getName(), rightQuery); + + //find chimeras + chimera->getChimeras(left); + data_results leftResults = chimera->getResults(); + + chimera->getChimeras(right); + data_results rightResults = chimera->getResults(); + + //if either piece is chimeric then report + Sequence trimmed = chimera->print(out, out2, leftResults, rightResults); + if (pDataArray->trim) { trimmed.printSequence(out3); } + + delete left; delete right; + + }else { //already chimeric + //print results + Sequence trimmed = chimera->print(out, out2); + if (pDataArray->trim) { trimmed.printSequence(out3); } + } + + + } + numSeqs++; + } + + delete candidateSeq; + + if (inFASTA.eof()) { break; } + + //report progress + if((numSeqs) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(numSeqs)); pDataArray->m->mothurOutEndLine(); } + } + //report progress + if((numSeqs) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(numSeqs)); pDataArray->m->mothurOutEndLine(); } + + pDataArray->numNoParents = chimera->getNumNoParents(); + if (pDataArray->numNoParents == numSeqs) { pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); } + + out.close(); + out2.close(); + if (pDataArray->trim) { out3.close(); } + inFASTA.close(); + delete chimera; + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + //append files + pDataArray->m->appendFiles(thisoutputFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(thisoutputFileName); + pDataArray->m->appendFiles(thisaccnosFileName, pDataArray->accnos); pDataArray->m->mothurRemove(thisaccnosFileName); + if (pDataArray->trim) { pDataArray->m->appendFiles(thistrimFastaFileName, pDataArray->fasta); pDataArray->m->mothurRemove(thistrimFastaFileName); } + pDataArray->m->mothurRemove(thisFastaName); + + totalSeqs += numSeqs; + + pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine(); + } + + pDataArray->count = totalSeqs; + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerGroupThreadFunction"); + exit(1); + } +} + #endif /**************************************************************************************************/ diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 54b1d9b..0846bbd 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -509,7 +509,7 @@ int ChimeraUchimeCommand::execute(){ if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one nameFile = nameFileNames[s]; }else { nameFile = getNamesFile(fastaFileNames[s]); } - + map seqs; readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index e9b3077..7505ae6 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -564,14 +564,14 @@ int ClusterSplitCommand::execute(){ listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files }else{ - cout << processors << '\t' << distName.size() << endl; + //cout << processors << '\t' << distName.size() << endl; vector < vector < map > > dividedNames; //distNames[1] = vector of filenames for process 1... dividedNames.resize(processors); //for each file group figure out which process will complete it //want to divide the load intelligently so the big files are spread between processes for (int i = 0; i < distName.size(); i++) { - cout << i << endl; + //cout << i << endl; int processToAssign = (i+1) % processors; if (processToAssign == 0) { processToAssign = processors; } @@ -580,7 +580,7 @@ int ClusterSplitCommand::execute(){ //not lets reverse the order of ever other process, so we balance big files running with little ones for (int i = 0; i < processors; i++) { - cout << i << endl; + //cout << i << endl; int remainder = ((i+1) % processors); if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); } } diff --git a/commandfactory.cpp b/commandfactory.cpp index 364c5ed..b49df72 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -203,7 +203,7 @@ CommandFactory::CommandFactory(){ commands["pre.cluster"] = "pre.cluster"; commands["pcoa"] = "pcoa"; commands["otu.hierarchy"] = "otu.hierarchy"; - commands["set.dir"] = "set.dir"; + commands["set.dir"] = "MPIEnabled"; commands["merge.files"] = "merge.files"; commands["parse.list"] = "parse.list"; commands["set.logfile"] = "set.logfile"; @@ -243,8 +243,8 @@ CommandFactory::CommandFactory(){ commands["anosim"] = "anosim"; commands["make.fastq"] = "make.fastq"; commands["merge.groups"] = "merge.groups"; - commands["get.current"] = "get.current"; - commands["set.current"] = "set.current"; + commands["get.current"] = "MPIEnabled"; + commands["set.current"] = "MPIEnabled"; commands["get.commandinfo"] = "get.commandinfo"; commands["deunique.tree"] = "deunique.tree"; commands["count.seqs"] = "count.seqs"; diff --git a/heatmapsim.cpp b/heatmapsim.cpp index 18043c9..62965f9 100644 --- a/heatmapsim.cpp +++ b/heatmapsim.cpp @@ -20,7 +20,7 @@ #include "sharedbraycurtis.h" //********************************************************************************************************************** -HeatMapSim::HeatMapSim(string dir, string i) : outputDir(dir), inputfile(i) { +HeatMapSim::HeatMapSim(string dir, string i, int f) : outputDir(dir), inputfile(i), fontSize(f) { m = MothurOut::getInstance(); } //********************************************************************************************************************** @@ -35,7 +35,7 @@ vector HeatMapSim::getPic(vector lookup, vectorcontrol_pressed) { return outputNames; } - string filenamesvg = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + calcs[k]->getName() + ".heatmap.sim.svg"; + string filenamesvg = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + calcs[k]->getName() + ".heatmap.sim.svg"; m->openOutputFile(filenamesvg, outsvg); outputNames.push_back(filenamesvg); @@ -45,12 +45,12 @@ vector HeatMapSim::getPic(vector lookup, vector"; - outsvg << "Heatmap at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Heatmap at distance " + lookup[0]->getLabel() + "\n"; //column labels for (int h = 0; h < lookup.size(); h++) { - outsvg << "getGroup().length() / 2)) + "\" y=\"50\">" + lookup[h]->getGroup() + "\n"; - outsvg << "getGroup().length() / 2)) + "\" x=\"50\">" + lookup[h]->getGroup() + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"50\">" + lookup[h]->getGroup() + "\n"; + outsvg << "getGroup().length() / 2)) + "\" x=\"50\">" + lookup[h]->getGroup() + "\n"; } sims.clear(); @@ -122,12 +122,12 @@ string HeatMapSim::getPic(vector< vector > dists, vector groups) //white backround outsvg << ""; - outsvg << "Heatmap for " + inputfile + "\n"; + outsvg << "Heatmap for " + inputfile + "\n"; //column labels for (int h = 0; h < groups.size(); h++) { - outsvg << "" + groups[h] + "\n"; - outsvg << "" + groups[h] + "\n"; + outsvg << "" + groups[h] + "\n"; + outsvg << "" + groups[h] + "\n"; } double biggest = -1; @@ -206,7 +206,7 @@ void HeatMapSim::printLegend(int y, float maxSim) { label /= 1000.0; string text = toString(label, 1); - outsvg << "" + text + "\n"; + outsvg << "" + text + "\n"; x += 153; } } diff --git a/heatmapsim.h b/heatmapsim.h index a28995d..aa23c18 100644 --- a/heatmapsim.h +++ b/heatmapsim.h @@ -19,7 +19,7 @@ class HeatMapSim { public: - HeatMapSim(string, string); + HeatMapSim(string, string, int); ~HeatMapSim(){}; vector getPic(vector, vector); @@ -29,6 +29,7 @@ class HeatMapSim { void printLegend(int, float); string format, groupComb, outputDir, inputfile; + int fontSize; ofstream outsvg; MothurOut* m; diff --git a/heatmapsimcommand.cpp b/heatmapsimcommand.cpp index 1b6fc5b..d78b3cf 100644 --- a/heatmapsimcommand.cpp +++ b/heatmapsimcommand.cpp @@ -30,6 +30,7 @@ vector HeatMapSimCommand::setParameters(){ CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); CommandParameter pcalc("calc", "Multiple", "jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-morisitahorn-braycurtis", "jest-thetayc", "", "", "",true,false); parameters.push_back(pcalc); + CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "",false,false); parameters.push_back(pfontsize); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -47,11 +48,12 @@ string HeatMapSimCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; - helpString += "The heatmap.sim command parameters are shared, phylip, column, name, groups, calc and label. shared or phylip or column and name are required unless valid current files exist.\n"; + helpString += "The heatmap.sim command parameters are shared, phylip, column, name, groups, calc, fontsize and label. shared or phylip or column and name are required unless valid current files exist.\n"; helpString += "There are two ways to use the heatmap.sim command. The first is with the read.otu command. \n"; helpString += "With the read.otu command you may use the groups, label and calc parameters. \n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n"; helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n"; + helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n"; helpString += "The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, label=yourLabels).\n"; helpString += "Example heatmap.sim(groups=A-B-C, calc=jabund).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; @@ -113,9 +115,7 @@ HeatMapSimCommand::HeatMapSimCommand(string option) { outputTypes["svg"] = tempOutNames; format = ""; - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } - + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -208,6 +208,10 @@ HeatMapSimCommand::HeatMapSimCommand(string option) { } } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); } + //check for optional parameter and set defaults // ...at some point should added some additional type checking... @@ -237,6 +241,8 @@ HeatMapSimCommand::HeatMapSimCommand(string option) { m->setGroups(Groups); } + string temp = validParameter.validFile(parameters, "fontsize", false); if (temp == "not found") { temp = "24"; } + convert(temp, fontsize); if (abort == false) { ValidCalculators validCalculator; @@ -286,7 +292,7 @@ int HeatMapSimCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - heatmap = new HeatMapSim(outputDir, inputfile); + heatmap = new HeatMapSim(outputDir, inputfile, fontsize); if (format == "shared") { runCommandShared(); diff --git a/heatmapsimcommand.h b/heatmapsimcommand.h index dd6fa2c..74e20be 100644 --- a/heatmapsimcommand.h +++ b/heatmapsimcommand.h @@ -44,6 +44,7 @@ private: set labels; //holds labels to be used string format, groups, label, calc, sharedfile, phylipfile, columnfile, namefile, outputDir, inputfile; vector Estimators, Groups, outputNames; + int fontsize; int runCommandShared(); int runCommandDist(); diff --git a/listseqscommand.cpp b/listseqscommand.cpp index 833ac95..afbacc0 100644 --- a/listseqscommand.cpp +++ b/listseqscommand.cpp @@ -400,14 +400,14 @@ int ListSeqsCommand::readAlign(){ if (!in.eof()) { in >> junk; } else { break; } } - + //m->getline(in); while(!in.eof()){ if (m->control_pressed) { in.close(); return 0; } in >> name; //read from first column - + //m->getline(in); //read rest for (int i = 0; i < 15; i++) { if (!in.eof()) { in >> junk; } diff --git a/mothurout.cpp b/mothurout.cpp index 32f3b85..bc5e24e 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1233,7 +1233,7 @@ vector MothurOut::divideFile(string filename, int& proc) { //save end pos filePos.push_back(size); - + //sanity check filePos for (int i = 0; i < (filePos.size()-1); i++) { if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } @@ -1469,10 +1469,17 @@ int MothurOut::getNumNames(string names){ } /***********************************************************************/ -void MothurOut::mothurRemove(string filename){ +int MothurOut::mothurRemove(string filename){ try { filename = getFullPathName(filename); - remove(filename.c_str()); + int error = remove(filename.c_str()); + //if (error != 0) { + // if (errno != ENOENT) { //ENOENT == file does not exist + // string message = "Error deleting file " + filename; + // perror(message.c_str()); + // } + //} + return error; } catch(exception& e) { errorOut(e, "MothurOut", "mothurRemove"); diff --git a/mothurout.h b/mothurout.h index 785263b..e614902 100644 --- a/mothurout.h +++ b/mothurout.h @@ -83,7 +83,7 @@ class MothurOut { int readNames(string, map&); int readNames(string, map >&); int readNames(string, vector&, map&); - void mothurRemove(string); + int mothurRemove(string); //searchs and checks diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index cedb74a..9a6998e 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -295,7 +295,7 @@ int ScreenSeqsCommand::execute(){ int numFastaSeqs = 0; set badSeqNames; int start = time(NULL); - + #ifdef USE_MPI int pid, numSeqsPerProcessor; int tag = 2001; @@ -304,7 +304,7 @@ int ScreenSeqsCommand::execute(){ MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); - + MPI_File inMPI; MPI_File outMPIGood; MPI_File outMPIBadAccnos; @@ -667,13 +667,11 @@ int ScreenSeqsCommand::getSummary(vector& positions){ #ifdef USE_MPI + int pid; MPI_Comm_rank(MPI_COMM_WORLD, &pid); - if (pid == 0) { //only one process should fix files + if (pid == 0) { driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]); - } - - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait #else int numSeqs = 0; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) @@ -707,6 +705,33 @@ int ScreenSeqsCommand::getSummary(vector& positions){ else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); } } +#ifdef USE_MPI + } + + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + MPI_Comm_size(MPI_COMM_WORLD, &processors); + + if (pid == 0) { + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&startPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + MPI_Send(&endPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + MPI_Send(&maxAmbig, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + MPI_Send(&maxHomoP, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + MPI_Send(&minLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + MPI_Send(&maxLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + } + }else { + MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + MPI_Recv(&endPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + MPI_Recv(&maxAmbig, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + MPI_Recv(&maxHomoP, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + MPI_Recv(&minLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + MPI_Recv(&maxLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case +#endif return 0; } catch(exception& e) { diff --git a/setdircommand.cpp b/setdircommand.cpp index 0b3379c..17bad60 100644 --- a/setdircommand.cpp +++ b/setdircommand.cpp @@ -101,6 +101,14 @@ int SetDirectoryCommand::execute(){ commandFactory = CommandFactory::getInstance(); + string tag = ""; +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + tag = toString(pid); +#endif + m->mothurOut("Mothur's directories:"); m->mothurOutEndLine(); //redirect output @@ -122,7 +130,7 @@ int SetDirectoryCommand::execute(){ //test to make sure directory exists output = m->getFullPathName(output); - string outTemp = output + "temp"; + string outTemp = output + tag + "temp"; ofstream out; out.open(outTemp.c_str(), ios::trunc); if(!out) { @@ -154,7 +162,7 @@ int SetDirectoryCommand::execute(){ //test to make sure directory exists input = m->getFullPathName(input); - string inTemp = input + "temp"; + string inTemp = input + tag + "temp"; ofstream in; in.open(inTemp.c_str(), ios::trunc); if(!in) {