MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status);
char* buf = new char[length];
- MPI_Recv(&buf, length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status);
+ MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status);
string temp = buf;
if (temp.length() > length) { temp = temp.substr(0, length); }
//played with this a bit, but it may be better to try user-defined datatypes with set string lengths??
vector<string> MPIBestSend = getBestWindow(lines[0]);
pref.clear();
-
+
//send your result to parent
for (int i = 0; i < numSeqs; i++) {
//for each label the user selected
while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
-
+
if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
//create catchall input file from mothur's inputfile
string filename = process(sabund, inputFileNames[p]);
string outputPath = m->getPathName(filename);
-
+
//create system command
string catchAllCommand = "";
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
//create catchall input file from mothur's inputfile
string filename = process(sabund, inputFileNames[p]);
string outputPath = m->getPathName(filename);
-
+
//create system command
string catchAllCommand = "";
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
- #ifdef USE_MPI
+ #ifdef USE_MPI
int pid, processors;
vector<unsigned long long> positions;
int numSeqs;
int tag = 2001;
-
- MPI_Status status;
MPI_File inMPI;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
- //char* inFileName = new char[file.length()];
- //memcpy(inFileName, file.c_str(), file.length());
-
- char inFileName[1024];
- strcpy(inFileName, file.c_str());
-
- MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
- //delete inFileName;
-
- if (pid == 0) {
+ MPI_Status status;
+
+ if (byGroup) {
+ char inFileName[1024];
+ strcpy(inFileName, file.c_str());
+
+ MPI_File_open(MPI_COMM_SELF, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+
positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
-
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
+
+ //read next sequence
+ int seqlength = positions[i+1] - positions[i];
+ char* buf4 = new char[seqlength];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence* current = new Sequence(iss);
+ if (current->getName() != "") {
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { unaligned = true; }
+
+ container.push_back(current);
+ if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
+ }
}
- }else{
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- positions.resize(numSeqs+1);
- MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
- }
-
- //read file
- for(int i=0;i<numSeqs;i++){
-
- if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
-
- //read next sequence
- int seqlength = positions[i+1] - positions[i];
- char* buf4 = new char[seqlength];
-
- MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
- string tempBuf = buf4;
- if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
- delete buf4;
-
- istringstream iss (tempBuf,istringstream::in);
-
- Sequence* current = new Sequence(iss);
- if (current->getName() != "") {
- if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
- else if (length != current->getAligned().length()) { unaligned = true; }
-
- container.push_back(current);
- if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
+ MPI_File_close(&inMPI);
+
+ }else {
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+
+ //char* inFileName = new char[file.length()];
+ //memcpy(inFileName, file.c_str(), file.length());
+
+ char inFileName[1024];
+ strcpy(inFileName, file.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ //delete inFileName;
+
+ if (pid == 0) {
+ positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ }
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
+
+ //read next sequence
+ int seqlength = positions[i+1] - positions[i];
+ char* buf4 = new char[seqlength];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence* current = new Sequence(iss);
+ if (current->getName() != "") {
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { unaligned = true; }
+
+ container.push_back(current);
+ if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
+ }
}
+
+ MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
}
-
- MPI_File_close(&inMPI);
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
ifstream in;
Sequence* current = new Sequence(in); m->gobble(in);
if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
- else if (length != current->getAligned().length()) { unaligned = true; }
+ else if (length != current->getAligned().length()) { unaligned = true; }
if (current->getName() != "") {
container.push_back(current);
public:
- Chimera(){ m = MothurOut::getInstance(); length = 0; unaligned = false; }
+ Chimera(){ m = MothurOut::getInstance(); length = 0; unaligned = false; byGroup = false; }
virtual ~Chimera(){ for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } for (int i = 0; i < filteredTemplateSeqs.size(); i++) { delete filteredTemplateSeqs[i]; } };
virtual bool getUnaligned() { return unaligned; }
virtual int getLength() { return length; }
vector<Sequence*> templateSeqs;
vector<Sequence*> filteredTemplateSeqs;
- bool filter, unaligned;
+ bool filter, unaligned, byGroup;
int length;
string seqMask, filterString, outputDir, templateFileName;
Sequence* getSequence(string); //find sequence from name
}
}
//***************************************************************************************************************
+//template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
+ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() {
+ try {
+ byGroup = bg;
+ fastafile = file; templateSeqs = readSeqs(fastafile);
+ templateFileName = temp;
+ searchMethod = mode;
+ kmerSize = k;
+ match = ms;
+ misMatch = mms;
+ window = win;
+ divR = div;
+ minSim = minsim;
+ minCov = mincov;
+ minBS = minbs;
+ minSNP = minsnp;
+ parents = par;
+ iters = it;
+ increment = inc;
+ numWanted = numw;
+ realign = r;
+ trimChimera = trim;
+ priority = prior;
+ numNoParents = 0;
+ blastlocation = blas;
+ threadID = tid;
+
+
+ createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ if (searchMethod == "distance") {
+ //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
//template=self
ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
blastlocation = blas;
threadID = tid;
+
createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
if (searchMethod == "distance") {
if (chimeraFlag == "yes") {
if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
- m->mothurOut(toString(threadID) +"\t"+ querySeq.getName() + "\tyes"); m->mothurOutEndLine();
+ m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
outAcc << querySeq.getName() << endl;
if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
public:
ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int);
ChimeraSlayer(string, string, bool, map<string, int>&, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int);
+ ChimeraSlayer(string, string, bool, map<string, int>&, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int, bool);
~ChimeraSlayer();
//until we resolve the issue 10-18-11
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
#else
- processors=1;
+ //processors=1;
#endif
}
}
int ChimeraSlayerCommand::execute(){
try{
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
+
for (int s = 0; s < fastaFileNames.size(); s++) {
m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
int start = time(NULL);
+ if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera";
+ string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos";
+ string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta";
- //you provided a groupfile
- string groupFile = "";
- if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
+ //clears files
+ ofstream out, out1, out2;
+ m->openOutputFile(outputFileName, out); out.close();
+ m->openOutputFile(accnosFileName, out1); out1.close();
+ if (trim) { m->openOutputFile(trimFastaFileName, out2); out2.close(); }
+ outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
+ outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
+ if (trim) { outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }
//maps a filename to priority map.
//if no groupfile this is fastafileNames[s] -> prioirity
fileToPriority[fastaFileNames[s]] = priority; //default
fileGroup[fastaFileNames[s]] = "noGroup";
SequenceParser* parser = NULL;
- int totalSeqs = 0;
int totalChimeras = 0;
+ lines.clear();
- if ((templatefile == "self") && (groupFile == "")) {
- fileGroup.clear();
- fileToPriority.clear();
- if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
- string nameFile = "";
- if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
- nameFile = nameFileNames[s];
- }else { nameFile = getNamesFile(fastaFileNames[s]); }
-
- //sort fastafile by abundance, returns new sorted fastafile name
- m->mothurOut("Sorting fastafile according to abundance..."); cout.flush();
- priority = sortFastaFile(fastaFileNames[s], nameFile);
- m->mothurOut("Done."); m->mothurOutEndLine();
-
- fileToPriority.clear();
- fileGroup.clear();
- fileToPriority[fastaFileNames[s]] = priority;
- fileGroup[fastaFileNames[s]] = "noGroup";
- if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
- }else if ((templatefile == "self") && (groupFile != "")) {
- fileGroup.clear();
- fileToPriority.clear();
- if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
- string nameFile = "";
- if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
- nameFile = nameFileNames[s];
- }else { nameFile = getNamesFile(fastaFileNames[s]); }
-
- //Parse sequences by group
- parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
- vector<string> groups = parser->getNamesOfGroups();
-
- for (int i = 0; i < groups.size(); i++) {
- vector<Sequence> thisGroupsSeqs = parser->getSeqs(groups[i]);
- map<string, string> thisGroupsMap = parser->getNameMap(groups[i]);
- string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta";
- priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile);
- fileToPriority[newFastaFile] = priority;
- fileGroup[newFastaFile] = groups[i];
- }
- }
-
- if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera";
- string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos";
- string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta";
+ if (templatefile == "self") { setUpForSelfReference(parser, fileGroup, fileToPriority, s); }
- //clears files
- ofstream out, out1, out2;
- m->openOutputFile(outputFileName, out); out.close();
- m->openOutputFile(accnosFileName, out1); out1.close();
- if (trim) { m->openOutputFile(trimFastaFileName, out2); out2.close(); }
- outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
- outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
- if (trim) { outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }
-
-
- for (itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) {
-
+ if (m->control_pressed) { if (parser != NULL) { delete parser; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ if (fileToPriority.size() == 1) { //you running without a groupfile
+ itFile = fileToPriority.begin();
string thisFastaName = itFile->first;
map<string, int> thisPriority = itFile->second;
-
- //this is true when you have parsed by groups
- if (fileToPriority.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); }
-
- string thisoutputFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera";
- string thisaccnosFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos";
- string thistrimFastaFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta";
-
- //create chimera here if you are mac or linux because fork will copy for you. Create in create processes instead if you are windows.
- if (processors == 1) { templateSeqsLength = setupChimera(thisFastaName, thisPriority); }
+#ifdef USE_MPI
+ MPIExecute(thisFastaName, outputFileName, accnosFileName, trimFastaFileName, thisPriority);
+#else
+ //break up file
+ vector<unsigned long long> positions;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ positions = m->divideFile(thisFastaName, processors);
+ for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
+#else
+ if (processors == 1) { lines.push_back(linePair(0, 1000)); }
else {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI
- templateSeqsLength = setupChimera(thisFastaName, thisPriority);
- #endif
- }
-
- if (m->control_pressed) { if (parser != NULL) { delete parser; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
-
- #ifdef USE_MPI
- MPIExecute(thisFastaName, thisoutputFileName, thisaccnosFileName, thistrimFastaFileName);
- if (m->control_pressed) { outputTypes.clear(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
- #else
- //break up file
- vector<unsigned long long> positions;
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- positions = m->divideFile(thisFastaName, processors);
- for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); }
- #else
- if (processors == 1) { lines.push_back(new linePair(0, 1000)); }
- else {
- positions = m->setFilePosFasta(thisFastaName, numSeqs);
+ positions = m->setFilePosFasta(thisFastaName, numSeqs);
- //figure out how many sequences you have to process
- int numSeqsPerProcessor = numSeqs / processors;
- for (int i = 0; i < processors; i++) {
- int startIndex = i * numSeqsPerProcessor;
- if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; }
- lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
- }
+ //figure out how many sequences you have to process
+ int numSeqsPerProcessor = numSeqs / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numSeqsPerProcessor;
+ if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; }
+ lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
}
- #endif
-
- if(processors == 1){
- numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName);
-
- int numNoParents = chimera->getNumNoParents();
- if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
-
- }else{ numSeqs = createProcesses(thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName); }
-
- if (m->control_pressed) { if (parser != NULL) { delete parser; } outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
-
- #endif
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || USE_MPI
- delete chimera;
- #endif
+ }
+#endif
+ if(processors == 1){ numSeqs = driver(lines[0], outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority); }
+ else{ numSeqs = createProcesses(outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ if (m->control_pressed) { if (parser != NULL) { delete parser; } outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+#endif
+ }else { //you have provided a groupfile
+#ifdef USE_MPI
+ MPIExecuteGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup);
+#else
+ if (processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); }
+ else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); } //destroys fileToPriority
+#endif
+
+#ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- //append files
- m->appendFiles(thisoutputFileName, outputFileName); m->mothurRemove(thisoutputFileName);
- totalChimeras = m->appendFiles(thisaccnosFileName, accnosFileName); m->mothurRemove(thisaccnosFileName);
- if (trim) { m->appendFiles(thistrimFastaFileName, trimFastaFileName); m->mothurRemove(thistrimFastaFileName); }
+ if (pid == 0) {
+#endif
- totalSeqs += numSeqs;
+ totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName);
+#ifdef USE_MPI
+ }
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
+#endif
}
-
- if (fileToPriority.size() > 1) { totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName); }
-
+
if (parser != NULL) { delete parser; }
- m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences."); m->mothurOutEndLine();
+ m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
}
//set accnos file as new current accnosfile
}
}
//**********************************************************************************************************************
-int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, string accnosFileName, string trimFastaFileName){
+int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosFileName, string trimFastaFileName, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup){
+ try {
+#ifdef USE_MPI
+ int pid;
+ int tag = 2001;
+
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+
+ //put filenames in a vector, then pass each process a starting and ending point in the vector
+ //all processes already have the fileToPriority and fileGroup, they just need to know which files to process
+ map<string, map<string, int> >::iterator itFile;
+ vector<string> filenames;
+ for(itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { filenames.push_back(itFile->first); }
+
+ int numGroupsPerProcessor = filenames.size() / processors;
+ int startIndex = pid * numGroupsPerProcessor;
+ int endIndex = (pid+1) * numGroupsPerProcessor;
+ if(pid == (processors - 1)){ endIndex = filenames.size(); }
+
+ vector<unsigned long long> MPIPos;
+
+ MPI_File outMPI;
+ MPI_File outMPIAccnos;
+ MPI_File outMPIFasta;
+
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+ int inMode=MPI_MODE_RDONLY;
+
+ char outFilename[1024];
+ strcpy(outFilename, outputFileName.c_str());
+
+ char outAccnosFilename[1024];
+ strcpy(outAccnosFilename, accnosFileName.c_str());
+
+ char outFastaFilename[1024];
+ strcpy(outFastaFilename, trimFastaFileName.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+ MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+ if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
+
+ if (m->control_pressed) { MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; }
+
+ //print headers
+ if (pid == 0) { //you are the root process
+ m->mothurOutEndLine();
+ m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
+ m->mothurOutEndLine();
+
+ string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
+
+ //print header
+ int length = outTemp.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outTemp.c_str(), length);
+
+ MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
+ delete buf2;
+ }
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
+
+ for (int i = startIndex; i < endIndex; i++) {
+
+ int start = time(NULL);
+ int num = 0;
+ string thisFastaName = filenames[i];
+ map<string, int> thisPriority = fileToPriority[thisFastaName];
+
+ char inFileName[1024];
+ strcpy(inFileName, thisFastaName.c_str());
+ MPI_File inMPI;
+ MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+
+ MPIPos = m->setFilePosFasta(thisFastaName, num); //fills MPIPos, returns numSeqs
+
+ cout << endl << "Checking sequences from group: " << fileGroup[thisFastaName] << "." << endl;
+
+ driverMPI(0, num, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, thisFastaName, thisPriority, true);
+ numSeqs += num;
+
+ MPI_File_close(&inMPI);
+ m->mothurRemove(thisFastaName);
+
+ cout << endl << "It took " << toString(time(NULL) - start) << " secs to check " + toString(num) + " sequences from group " << fileGroup[thisFastaName] << "." << endl;
+ }
+
+ if (pid == 0) {
+ for(int i = 1; i < processors; i++) {
+ int temp = 0;
+ MPI_Recv(&temp, 1, MPI_INT, i, 2001, MPI_COMM_WORLD, &status);
+ numSeqs += temp;
+ }
+ }else{ MPI_Send(&numSeqs, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); }
+
+ MPI_File_close(&outMPI);
+ MPI_File_close(&outMPIAccnos);
+ if (trim) { MPI_File_close(&outMPIFasta); }
+
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
+#endif
+ return 0;
+
+ }catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayerCommand", "MPIExecuteGroups");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, string accnosFileName, string trimFastaFileName, map<string, int>& priority){
try {
#ifdef USE_MPI
MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; }
if (pid == 0) { //you are the root process
m->mothurOutEndLine();
}
//do your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
-
- int numNoParents = chimera->getNumNoParents();
- int temp;
- for(int i = 1; i < processors; i++) {
- MPI_Recv(&temp, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &status);
- numNoParents += temp;
- }
-
-
- if (numSeqs == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
-
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; }
}else{ //you are a child process
if (templatefile != "self") { //if template=self we can only use 1 processor
if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
//do your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
-
- int numNoParents = chimera->getNumNoParents();
- MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; }
}
}
#endif
- return 0;
+ return numSeqs;
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayerCommand", "MPIExecute");
itChimeras = chimerasInFile.find(itUnique->second);
if (itChimeras == chimerasInFile.end()) {
+ //is this sequence not already in the file
itNames = namesInFile.find((itUnique->second));
- if (itNames == namesInFile.end()) {cout << itUnique->second << endl; out << itUnique->second << '\t' << "no" << endl; namesInFile.insert(itUnique->second); }
+ if (itNames == namesInFile.end()) { out << itUnique->second << '\t' << "no" << endl; namesInFile.insert(itUnique->second); }
}
}
}else { //read the rest of the line
//then you really are a no so print, otherwise skip
if (itChimeras == chimerasInFile.end()) { print = true; }
+
}else{ print = true; }
}
}
m->errorOut(e, "ChimeraSlayerCommand", "deconvoluteResults");
exit(1);
}
-}
+}
//**********************************************************************************************************************
-int ChimeraSlayerCommand::setupChimera(string inputFile, map<string, int>& priority){
+int ChimeraSlayerCommand::setUpForSelfReference(SequenceParser*& parser, map<string, string>& fileGroup, map<string, map<string, int> >& fileToPriority, int s){
try {
- if (templatefile != "self") { //you want to run slayer with a reference template
- chimera = new ChimeraSlayer(inputFile, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());
+ fileGroup.clear();
+ fileToPriority.clear();
+
+ string nameFile = "";
+ if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
+ nameFile = nameFileNames[s];
+ }else { nameFile = getNamesFile(fastaFileNames[s]); }
+
+ //you provided a groupfile
+ string groupFile = "";
+ if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
+
+ if (groupFile == "") {
+ if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+
+ //sort fastafile by abundance, returns new sorted fastafile name
+ m->mothurOut("Sorting fastafile according to abundance..."); cout.flush();
+ priority = sortFastaFile(fastaFileNames[s], nameFile);
+ m->mothurOut("Done."); m->mothurOutEndLine();
+
+ fileToPriority[fastaFileNames[s]] = priority;
+ fileGroup[fastaFileNames[s]] = "noGroup";
}else {
- chimera = new ChimeraSlayer(inputFile, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());
+ //Parse sequences by group
+ parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+ vector<string> groups = parser->getNamesOfGroups();
+
+ for (int i = 0; i < groups.size(); i++) {
+ vector<Sequence> thisGroupsSeqs = parser->getSeqs(groups[i]);
+ map<string, string> thisGroupsMap = parser->getNameMap(groups[i]);
+ string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta";
+ priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile);
+ fileToPriority[newFastaFile] = priority;
+ fileGroup[newFastaFile] = groups[i];
+ }
}
- if (m->control_pressed) { delete chimera; return 0; }
- if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
-
- return (chimera->getLength());
+ return 0;
}
catch(exception& e) {
- m->errorOut(e, "ChimeraSlayerCommand", "setupChimera");
+ m->errorOut(e, "ChimeraSlayerCommand", "setUpForSelfReference");
exit(1);
}
}
-//**********************************************************************************************************************
+//**********************************************************************************************************************
string ChimeraSlayerCommand::getNamesFile(string& inputFile){
try {
string nameFile = "";
}
//**********************************************************************************************************************
-int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos, string fasta){
+int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string fasta, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup){
+ try {
+ int totalSeqs = 0;
+
+ for (map<string, map<string, int> >::iterator itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) {
+
+ if (m->control_pressed) { return 0; }
+
+ int start = time(NULL);
+ string thisFastaName = itFile->first;
+ map<string, int> thisPriority = itFile->second;
+ string thisoutputFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera";
+ string thisaccnosFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos";
+ string thistrimFastaFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta";
+
+ m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine();
+
+ lines.clear();
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ int proc = 1;
+ vector<unsigned long long> positions = m->divideFile(thisFastaName, proc);
+ lines.push_back(linePair(positions[0], positions[1]));
+#else
+ lines.push_back(linePair(0, 1000));
+#endif
+ int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority);
+
+ //append files
+ m->appendFiles(thisoutputFileName, outputFName); m->mothurRemove(thisoutputFileName);
+ m->appendFiles(thisaccnosFileName, accnos); m->mothurRemove(thisaccnosFileName);
+ if (trim) { m->appendFiles(thistrimFastaFileName, fasta); m->mothurRemove(thistrimFastaFileName); }
+ m->mothurRemove(thisFastaName);
+
+ totalSeqs += numSeqs;
+
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine();
+ }
+
+ return totalSeqs;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayerCommand", "driverGroups");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accnos, string fasta, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup) {
try {
+ int process = 1;
+ int num = 0;
+ processIDS.clear();
+
+ if (fileToPriority.size() < processors) { processors = fileToPriority.size(); }
+
+ int groupsPerProcessor = fileToPriority.size() / processors;
+ int remainder = fileToPriority.size() % processors;
+
+ vector< map<string, map<string, int> > > breakUp;
+
+ for (int i = 0; i < processors; i++) {
+ map<string, map<string, int> > thisFileToPriority;
+ map<string, map<string, int> >::iterator itFile;
+ int count = 0;
+ int enough = groupsPerProcessor;
+ if (i == 0) { enough = groupsPerProcessor + remainder; }
+
+ for (itFile = fileToPriority.begin(); itFile != fileToPriority.end();) {
+ thisFileToPriority[itFile->first] = itFile->second;
+ fileToPriority.erase(itFile++);
+ count++;
+ if (count == enough) { break; }
+ }
+ breakUp.push_back(thisFileToPriority);
+ }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = outputFName + toString(getpid()) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processors;i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
+ m->openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); m->mothurRemove(tempFile);
+ }
+#else
+
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the slayerData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<slayerData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for(int i=1; i<processors; i++ ){
+ string extension = toString(i) + ".temp";
+ slayerData* tempslayer = new slayerData((outputFName + extension), (fasta + extension), (accnos + extension), templatefile, search, blastlocation, trimera, trim, realign, m, breakUp[i], fileGroup, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i);
+ pDataArray.push_back(tempslayer);
+ processIDS.push_back(i);
+
+ //MySlayerThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i-1] = CreateThread(NULL, 0, MySlayerGroupThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ num += pDataArray[i]->count;
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+#endif
+
+ //append output files
+ for(int i=0;i<processIDS.size();i++){
+ m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
+ m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+ m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+
+ if (trim) {
+ m->appendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta);
+ m->mothurRemove((fasta + toString(processIDS[i]) + ".temp"));
+ }
+ }
+
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayerCommand", "createProcessesGroups");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string filename, string accnos, string fasta, map<string, int>& priority){
+ try {
+
+ Chimera* chimera;
+ if (templatefile != "self") { //you want to run slayer with a reference template
+ chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());
+ }else {
+ chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());
+ }
+
+ if (m->control_pressed) { delete chimera; return 0; }
+
+ if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+ templateSeqsLength = chimera->getLength();
+
ofstream out;
m->openOutputFile(outputFName, out);
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
- inFASTA.seekg(filePos->start);
+ inFASTA.seekg(filePos.start);
- if (filePos->start == 0) { chimera->printHeader(out); }
+ if (filePos.start == 0) { chimera->printHeader(out); }
bool done = false;
int count = 0;
while (!done) {
- if (m->control_pressed) { out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; }
+ if (m->control_pressed) { delete chimera; out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; }
Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
string candidateAligned = candidateSeq->getAligned();
//find chimeras
chimera->getChimeras(candidateSeq);
- if (m->control_pressed) { delete candidateSeq; return 1; }
+ if (m->control_pressed) { delete chimera; delete candidateSeq; return 1; }
//if you are not chimeric, then check each half
data_results wholeResults = chimera->getResults();
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
unsigned long long pos = inFASTA.tellg();
- if ((pos == -1) || (pos >= filePos->end)) { break; }
+ if ((pos == -1) || (pos >= filePos.end)) { break; }
#else
if (inFASTA.eof()) { break; }
#endif
//report progress
if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ int numNoParents = chimera->getNumNoParents();
+ if (numNoParents == count) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+
out.close();
out2.close();
if (trim) { out3.close(); }
inFASTA.close();
+ delete chimera;
return count;
+
+
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayerCommand", "driver");
}
//**********************************************************************************************************************
#ifdef USE_MPI
-int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector<unsigned long long>& MPIPos){
- try {
+int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector<unsigned long long>& MPIPos, string filename, map<string, int>& priority, bool byGroup){
+ try {
MPI_Status status;
int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ Chimera* chimera;
+ if (templatefile != "self") { //you want to run slayer with a reference template
+ chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());
+ }else {
+ chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand(), byGroup);
+ }
+
+ if (m->control_pressed) { delete chimera; return 0; }
+
+ if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+ templateSeqsLength = chimera->getLength();
+
for(int i=0;i<num;i++){
- if (m->control_pressed) { return 1; }
+ if (m->control_pressed) { delete chimera; return 1; }
//read next sequence
int length = MPIPos[start+i+1] - MPIPos[start+i];
//find chimeras
chimera->getChimeras(candidateSeq);
- if (m->control_pressed) { delete candidateSeq; return 1; }
+ if (m->control_pressed) { delete chimera; delete candidateSeq; return 1; }
//if you are not chimeric, then check each half
data_results wholeResults = chimera->getResults();
//report progress
if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
-
+ int numNoParents = chimera->getNumNoParents();
+ if (numNoParents == num) { cout << "[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors." << endl; }
+
+ delete chimera;
return 0;
}
catch(exception& e) {
/**************************************************************************************************/
-int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta) {
+int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta, map<string, int>& thisPriority) {
try {
int process = 0;
int num = 0;
- int numNoParents = 0;
processIDS.clear();
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp");
+ num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", thisPriority);
//pass numSeqs to parent
ofstream out;
string tempFile = outputFileName + toString(getpid()) + ".num.temp";
m->openOutputFile(tempFile, out);
- out << num << '\t' << chimera->getNumNoParents() << endl;
+ out << num << endl;
out.close();
exit(0);
}else {
ifstream in;
string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
- if (!in.eof()) { int tempNum = 0; int tempNumParents = 0; in >> tempNum >> tempNumParents; num += tempNum; numNoParents += tempNumParents; }
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
in.close(); m->mothurRemove(tempFile);
}
#else
//Create processor worker threads.
for( int i=0; i<processors; i++ ){
string extension = toString(i) + ".temp";
- slayerData* tempslayer = new slayerData((outputFileName + extension), (fasta + extension), (accnos + extension), filename, templatefile, search, blastlocation, trimera, trim, realign, m, lines[i]->start, lines[i]->end, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i);
+ slayerData* tempslayer = new slayerData((outputFileName + extension), (fasta + extension), (accnos + extension), filename, templatefile, search, blastlocation, trimera, trim, realign, m, lines[i].start, lines[i].end, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i);
pDataArray.push_back(tempslayer);
processIDS.push_back(i);
//Close all thread handles and free memory allocations.
for(int i=0; i < pDataArray.size(); i++){
num += pDataArray[i]->count;
- numNoParents += pDataArray[i]->numNoParents;
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
#endif
- if (num == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
rename((accnos + toString(processIDS[0]) + ".temp").c_str(), accnos.c_str());
};
vector<int> processIDS; //processid
- vector<linePair*> lines;
+ vector<linePair> lines;
- int driver(linePair*, string, string, string, string);
- int createProcesses(string, string, string, string);
+ int driver(linePair, string, string, string, string, map<string, int>&);
+ int createProcesses(string, string, string, string, map<string, int>&);
int divideInHalf(Sequence, string&, string&);
map<string, int> sortFastaFile(string, string);
map<string, int> sortFastaFile(vector<Sequence>&, map<string, string>&, string newFile);
string getNamesFile(string&);
- int setupChimera(string, map<string, int>&);
- int MPIExecute(string, string, string, string);
+ //int setupChimera(string,);
+ int MPIExecute(string, string, string, string, map<string, int>&);
int deconvoluteResults(SequenceParser*, string, string, string);
map<string, int> priority;
+ int setUpForSelfReference(SequenceParser*&, map<string, string>&, map<string, map<string, int> >&, int);
+ int driverGroups(string, string, string, map<string, map<string, int> >&, map<string, string>&);
+ int createProcessesGroups(string, string, string, map<string, map<string, int> >&, map<string, string>&);
+ int MPIExecuteGroups(string, string, string, map<string, map<string, int> >&, map<string, string>&);
#ifdef USE_MPI
- int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
+ int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, string, map<string, int>&, bool);
#endif
bool abort, realign, trim, trimera, save;
string fastafile, groupfile, templatefile, outputDir, search, namefile, blastlocation;
int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
float divR;
- Chimera* chimera;
vector<string> outputNames;
vector<string> fastaFileNames;
int count;
int numNoParents;
int threadId;
+ map<string, map<string, int> > fileToPriority;
+ map<string, string> fileGroup;
slayerData(){}
slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long long st, unsigned long long en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
count = 0;
numNoParents = 0;
}
+ slayerData(string o, string fa, string ac, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, map<string, map<string, int> >& fPriority, map<string, string>& fileG, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
+ outputFName = o;
+ fasta = fa;
+ accnos = ac;
+ templatefile = te;
+ search = se;
+ blastlocation = bl;
+ trimera = tri;
+ trim = trm;
+ realign = re;
+ m = mout;
+ fileGroup = fileG;
+ fileToPriority = fPriority;
+ ksize = ks;
+ match = ma;
+ mismatch = mis;
+ window = win;
+ minSimilarity = minS;
+ minCoverage = minC;
+ minBS = miBS;
+ minSNP = minSN;
+ parents = par;
+ iters = it;
+ increment = inc;
+ numwanted = numw;
+ divR = div;
+ priority = prior;
+ threadId = tid;
+ count = 0;
+ numNoParents = 0;
+ }
+
};
/**************************************************************************************************/
if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); }
pDataArray->numNoParents = chimera->getNumNoParents();
+ if (pDataArray->numNoParents == count) { pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); }
+
out.close();
out2.close();
if (pDataArray->trim) { out3.close(); }
exit(1);
}
}
+
+/**************************************************************************************************/
+
+static DWORD WINAPI MySlayerGroupThreadFunction(LPVOID lpParam){
+ slayerData* pDataArray;
+ pDataArray = (slayerData*)lpParam;
+
+ try {
+
+ int totalSeqs = 0;
+
+ for (map<string, map<string, int> >::iterator itFile = pDataArray->fileToPriority.begin(); itFile != pDataArray->fileToPriority.end(); itFile++) {
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ int start = time(NULL);
+ string thisFastaName = itFile->first;
+ map<string, int> thisPriority = itFile->second;
+ string thisoutputFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.chimera";
+ string thisaccnosFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.accnos";
+ string thistrimFastaFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.fasta";
+
+ pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group: " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine();
+
+ //int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority);
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ ofstream out;
+ pDataArray->m->openOutputFile(thisoutputFileName, out);
+
+ ofstream out2;
+ pDataArray->m->openOutputFile(thisaccnosFileName, out2);
+
+ ofstream out3;
+ if (pDataArray->trim) { pDataArray->m->openOutputFile(thistrimFastaFileName, out3); }
+
+ ifstream inFASTA;
+ pDataArray->m->openInputFile(thisFastaName, inFASTA);
+
+ Chimera* chimera;
+ chimera = new ChimeraSlayer(thisFastaName, pDataArray->templatefile, pDataArray->trim, thisPriority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);
+ chimera->printHeader(out);
+
+ int numSeqs = 0;
+
+ if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 0; }
+
+ if (chimera->getUnaligned()) {
+ pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine();
+ out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
+ delete chimera;
+ return 0;
+ }
+ int templateSeqsLength = chimera->getLength();
+
+ bool done = false;
+ while (!done) {
+
+ if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1; }
+
+ Sequence* candidateSeq = new Sequence(inFASTA); pDataArray->m->gobble(inFASTA);
+ string candidateAligned = candidateSeq->getAligned();
+
+ if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+ if (candidateSeq->getAligned().length() != templateSeqsLength) {
+ pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
+ }else{
+ //find chimeras
+ chimera->getChimeras(candidateSeq);
+
+ if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete candidateSeq; delete chimera; return 1; }
+
+ //if you are not chimeric, then check each half
+ data_results wholeResults = chimera->getResults();
+
+ //determine if we need to split
+ bool isChimeric = false;
+
+ if (wholeResults.flag == "yes") {
+ string chimeraFlag = "no";
+ if( (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
+ ||
+ (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
+
+
+ if (chimeraFlag == "yes") {
+ if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
+ }
+ }
+
+ if ((!isChimeric) && pDataArray->trimera) {
+
+ //split sequence in half by bases
+ string leftQuery, rightQuery;
+ Sequence tempSeq(candidateSeq->getName(), candidateAligned);
+ //divideInHalf(tempSeq, leftQuery, rightQuery);
+ string queryUnAligned = tempSeq.getUnaligned();
+ int numBases = int(queryUnAligned.length() * 0.5);
+
+ string queryAligned = tempSeq.getAligned();
+ leftQuery = tempSeq.getAligned();
+ rightQuery = tempSeq.getAligned();
+
+ int baseCount = 0;
+ int leftSpot = 0;
+ for (int i = 0; i < queryAligned.length(); i++) {
+ //if you are a base
+ if (isalpha(queryAligned[i])) {
+ baseCount++;
+ }
+
+ //if you have half
+ if (baseCount >= numBases) { leftSpot = i; break; } //first half
+ }
+
+ //blank out right side
+ for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
+
+ //blank out left side
+ for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
+
+ //run chimeraSlayer on each piece
+ Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
+ Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
+
+ //find chimeras
+ chimera->getChimeras(left);
+ data_results leftResults = chimera->getResults();
+
+ chimera->getChimeras(right);
+ data_results rightResults = chimera->getResults();
+
+ //if either piece is chimeric then report
+ Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
+ if (pDataArray->trim) { trimmed.printSequence(out3); }
+
+ delete left; delete right;
+
+ }else { //already chimeric
+ //print results
+ Sequence trimmed = chimera->print(out, out2);
+ if (pDataArray->trim) { trimmed.printSequence(out3); }
+ }
+
+
+ }
+ numSeqs++;
+ }
+
+ delete candidateSeq;
+
+ if (inFASTA.eof()) { break; }
+
+ //report progress
+ if((numSeqs) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(numSeqs)); pDataArray->m->mothurOutEndLine(); }
+ }
+ //report progress
+ if((numSeqs) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(numSeqs)); pDataArray->m->mothurOutEndLine(); }
+
+ pDataArray->numNoParents = chimera->getNumNoParents();
+ if (pDataArray->numNoParents == numSeqs) { pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); }
+
+ out.close();
+ out2.close();
+ if (pDataArray->trim) { out3.close(); }
+ inFASTA.close();
+ delete chimera;
+
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ //append files
+ pDataArray->m->appendFiles(thisoutputFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(thisoutputFileName);
+ pDataArray->m->appendFiles(thisaccnosFileName, pDataArray->accnos); pDataArray->m->mothurRemove(thisaccnosFileName);
+ if (pDataArray->trim) { pDataArray->m->appendFiles(thistrimFastaFileName, pDataArray->fasta); pDataArray->m->mothurRemove(thistrimFastaFileName); }
+ pDataArray->m->mothurRemove(thisFastaName);
+
+ totalSeqs += numSeqs;
+
+ pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine();
+ }
+
+ pDataArray->count = totalSeqs;
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerGroupThreadFunction");
+ exit(1);
+ }
+}
+
#endif
/**************************************************************************************************/
if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
nameFile = nameFileNames[s];
}else { nameFile = getNamesFile(fastaFileNames[s]); }
-
+
map<string, string> seqs;
readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
}else{
- cout << processors << '\t' << distName.size() << endl;
+ //cout << processors << '\t' << distName.size() << endl;
vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
dividedNames.resize(processors);
//for each file group figure out which process will complete it
//want to divide the load intelligently so the big files are spread between processes
for (int i = 0; i < distName.size(); i++) {
- cout << i << endl;
+ //cout << i << endl;
int processToAssign = (i+1) % processors;
if (processToAssign == 0) { processToAssign = processors; }
//not lets reverse the order of ever other process, so we balance big files running with little ones
for (int i = 0; i < processors; i++) {
- cout << i << endl;
+ //cout << i << endl;
int remainder = ((i+1) % processors);
if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); }
}
commands["pre.cluster"] = "pre.cluster";
commands["pcoa"] = "pcoa";
commands["otu.hierarchy"] = "otu.hierarchy";
- commands["set.dir"] = "set.dir";
+ commands["set.dir"] = "MPIEnabled";
commands["merge.files"] = "merge.files";
commands["parse.list"] = "parse.list";
commands["set.logfile"] = "set.logfile";
commands["anosim"] = "anosim";
commands["make.fastq"] = "make.fastq";
commands["merge.groups"] = "merge.groups";
- commands["get.current"] = "get.current";
- commands["set.current"] = "set.current";
+ commands["get.current"] = "MPIEnabled";
+ commands["set.current"] = "MPIEnabled";
commands["get.commandinfo"] = "get.commandinfo";
commands["deunique.tree"] = "deunique.tree";
commands["count.seqs"] = "count.seqs";
#include "sharedbraycurtis.h"
//**********************************************************************************************************************
-HeatMapSim::HeatMapSim(string dir, string i) : outputDir(dir), inputfile(i) {
+HeatMapSim::HeatMapSim(string dir, string i, int f) : outputDir(dir), inputfile(i), fontSize(f) {
m = MothurOut::getInstance();
}
//**********************************************************************************************************************
if (m->control_pressed) { return outputNames; }
- string filenamesvg = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + calcs[k]->getName() + ".heatmap.sim.svg";
+ string filenamesvg = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + calcs[k]->getName() + ".heatmap.sim.svg";
m->openOutputFile(filenamesvg, outsvg);
outputNames.push_back(filenamesvg);
//white backround
outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"" + toString((lookup.size() * 150) + 160) + "\" height=\"" + toString((lookup.size() * 150) + 160) + "\"/>";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((lookup.size() * 75) - 40) + "\" y=\"25\">Heatmap at distance " + lookup[0]->getLabel() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" x=\"" + toString((lookup.size() * 75) - 40) + "\" y=\"25\">Heatmap at distance " + lookup[0]->getLabel() + "</text>\n";
//column labels
for (int h = 0; h < lookup.size(); h++) {
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(((150 * (h+1)) ) - ((int)lookup[h]->getGroup().length() / 2)) + "\" y=\"50\">" + lookup[h]->getGroup() + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" y=\"" + toString(((150 * (h+1)) ) - ((int)lookup[h]->getGroup().length() / 2)) + "\" x=\"50\">" + lookup[h]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" x=\"" + toString(((150 * (h+1)) ) - ((int)lookup[h]->getGroup().length() / 2)) + "\" y=\"50\">" + lookup[h]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" y=\"" + toString(((150 * (h+1)) ) - ((int)lookup[h]->getGroup().length() / 2)) + "\" x=\"50\">" + lookup[h]->getGroup() + "</text>\n";
}
sims.clear();
//white backround
outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"" + toString((dists.size() * 150) + 160) + "\" height=\"" + toString((dists.size() * 150) + 160) + "\"/>";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((dists.size() * 75) - 40) + "\" y=\"25\">Heatmap for " + inputfile + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" x=\"" + toString((dists.size() * 75) - 40) + "\" y=\"25\">Heatmap for " + inputfile + "</text>\n";
//column labels
for (int h = 0; h < groups.size(); h++) {
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(((150 * (h+1)) ) - ((int)groups[h].length() / 2)) + "\" y=\"50\">" + groups[h] + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" y=\"" + toString(((150 * (h+1)) ) - ((int)groups[h].length() / 2)) + "\" x=\"50\">" + groups[h] + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" x=\"" + toString(((150 * (h+1)) ) - ((int)groups[h].length() / 2)) + "\" y=\"50\">" + groups[h] + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" y=\"" + toString(((150 * (h+1)) ) - ((int)groups[h].length() / 2)) + "\" x=\"50\">" + groups[h] + "</text>\n";
}
double biggest = -1;
label /= 1000.0;
string text = toString(label, 1);
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(x) + "\" y=\"" + toString(y-3) + "\">" + text + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" x=\"" + toString(x) + "\" y=\"" + toString(y-3) + "\">" + text + "</text>\n";
x += 153;
}
}
class HeatMapSim {
public:
- HeatMapSim(string, string);
+ HeatMapSim(string, string, int);
~HeatMapSim(){};
vector<string> getPic(vector<SharedRAbundVector*>, vector<Calculator*>);
void printLegend(int, float);
string format, groupComb, outputDir, inputfile;
+ int fontSize;
ofstream outsvg;
MothurOut* m;
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pcalc("calc", "Multiple", "jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-morisitahorn-braycurtis", "jest-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
+ CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "",false,false); parameters.push_back(pfontsize);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
try {
string helpString = "";
ValidCalculators validCalculator;
- helpString += "The heatmap.sim command parameters are shared, phylip, column, name, groups, calc and label. shared or phylip or column and name are required unless valid current files exist.\n";
+ helpString += "The heatmap.sim command parameters are shared, phylip, column, name, groups, calc, fontsize and label. shared or phylip or column and name are required unless valid current files exist.\n";
helpString += "There are two ways to use the heatmap.sim command. The first is with the read.otu command. \n";
helpString += "With the read.otu command you may use the groups, label and calc parameters. \n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n";
helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n";
+ helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n";
helpString += "The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, label=yourLabels).\n";
helpString += "Example heatmap.sim(groups=A-B-C, calc=jabund).\n";
helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
outputTypes["svg"] = tempOutNames;
format = "";
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
+
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
}
}
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
m->setGroups(Groups);
}
+ string temp = validParameter.validFile(parameters, "fontsize", false); if (temp == "not found") { temp = "24"; }
+ convert(temp, fontsize);
if (abort == false) {
ValidCalculators validCalculator;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- heatmap = new HeatMapSim(outputDir, inputfile);
+ heatmap = new HeatMapSim(outputDir, inputfile, fontsize);
if (format == "shared") {
runCommandShared();
set<string> labels; //holds labels to be used
string format, groups, label, calc, sharedfile, phylipfile, columnfile, namefile, outputDir, inputfile;
vector<string> Estimators, Groups, outputNames;
+ int fontsize;
int runCommandShared();
int runCommandDist();
if (!in.eof()) { in >> junk; }
else { break; }
}
-
+ //m->getline(in);
while(!in.eof()){
if (m->control_pressed) { in.close(); return 0; }
in >> name; //read from first column
-
+ //m->getline(in);
//read rest
for (int i = 0; i < 15; i++) {
if (!in.eof()) { in >> junk; }
//save end pos
filePos.push_back(size);
-
+
//sanity check filePos
for (int i = 0; i < (filePos.size()-1); i++) {
if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
}
/***********************************************************************/
-void MothurOut::mothurRemove(string filename){
+int MothurOut::mothurRemove(string filename){
try {
filename = getFullPathName(filename);
- remove(filename.c_str());
+ int error = remove(filename.c_str());
+ //if (error != 0) {
+ // if (errno != ENOENT) { //ENOENT == file does not exist
+ // string message = "Error deleting file " + filename;
+ // perror(message.c_str());
+ // }
+ //}
+ return error;
}
catch(exception& e) {
errorOut(e, "MothurOut", "mothurRemove");
int readNames(string, map<string, string>&);
int readNames(string, map<string, vector<string> >&);
int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
- void mothurRemove(string);
+ int mothurRemove(string);
//searchs and checks
int numFastaSeqs = 0;
set<string> badSeqNames;
int start = time(NULL);
-
+
#ifdef USE_MPI
int pid, numSeqsPerProcessor;
int tag = 2001;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
+
MPI_File inMPI;
MPI_File outMPIGood;
MPI_File outMPIBadAccnos;
#ifdef USE_MPI
+ int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
- if (pid == 0) { //only one process should fix files
+ if (pid == 0) {
driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
- }
-
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
#else
int numSeqs = 0;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); }
}
+#ifdef USE_MPI
+ }
+
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+
+ if (pid == 0) {
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&startPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ MPI_Send(&endPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ MPI_Send(&maxAmbig, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ MPI_Send(&maxHomoP, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ MPI_Send(&minLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ MPI_Send(&maxLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ }
+ }else {
+ MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ MPI_Recv(&endPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ MPI_Recv(&maxAmbig, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ MPI_Recv(&maxHomoP, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ MPI_Recv(&minLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ MPI_Recv(&maxLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+#endif
return 0;
}
catch(exception& e) {
commandFactory = CommandFactory::getInstance();
+ string tag = "";
+#ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ tag = toString(pid);
+#endif
+
m->mothurOut("Mothur's directories:"); m->mothurOutEndLine();
//redirect output
//test to make sure directory exists
output = m->getFullPathName(output);
- string outTemp = output + "temp";
+ string outTemp = output + tag + "temp";
ofstream out;
out.open(outTemp.c_str(), ios::trunc);
if(!out) {
//test to make sure directory exists
input = m->getFullPathName(input);
- string inTemp = input + "temp";
+ string inTemp = input + tag + "temp";
ofstream in;
in.open(inTemp.c_str(), ios::trunc);
if(!in) {