int start = time(NULL);
#ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
+ int pid, numSeqsPerProcessor;
int tag = 2001;
vector<unsigned long int> MPIPos;
MPIWroteAccnos = false;
}
//***************************************************************************************************************
-int Bellerophon::print(ostream& out, ostream& outAcc) {
+int Bellerophon::print(ostream& out, ostream& outAcc, string s) {
try {
int above1 = 0;
}
#ifdef USE_MPI
//***************************************************************************************************************
-int Bellerophon::print(MPI_File& out, MPI_File& outAcc) {
+int Bellerophon::print(MPI_File& out, MPI_File& outAcc, string s) {
try {
int pid;
int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
try {
- //float dme = 0.0;
SeqMap::iterator itR;
SeqMap::iterator itL;
~Bellerophon() { delete distCalculator; for (int i = 0; i < seqs.size(); i++) { delete seqs[i]; } seqs.clear(); }
int getChimeras();
- int print(ostream&, ostream&);
+ int print(ostream&, ostream&, string);
#ifdef USE_MPI
- int print(MPI_File&, MPI_File&);
+ int print(MPI_File&, MPI_File&, string);
#endif
private:
#endif
}
//***************************************************************************************************************
-int Ccode::print(ostream& out, ostream& outAcc) {
+Sequence* Ccode::print(ostream& out, ostream& outAcc) {
try {
ofstream out2;
//free memory
for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; }
- return results;
+ return NULL;
}
catch(exception& e) {
m->errorOut(e, "Ccode", "print");
}
#ifdef USE_MPI
//***************************************************************************************************************
-int Ccode::print(MPI_File& out, MPI_File& outAcc) {
+Sequence* Ccode::print(MPI_File& out, MPI_File& outAcc) {
try {
string outMapString = "";
//free memory
for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; }
- return results;
+ return NULL;
}
catch(exception& e) {
m->errorOut(e, "Ccode", "print");
MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status);
delete buf;
+
+ return 0;
}
catch(exception& e) {
~Ccode();
int getChimeras(Sequence* query);
- int print(ostream&, ostream&);
+ Sequence* print(ostream&, ostream&);
#ifdef USE_MPI
- int print(MPI_File&, MPI_File&);
+ Sequence* print(MPI_File&, MPI_File&);
#endif
private:
virtual void printHeader(ostream&){};
virtual int getChimeras(Sequence*){ return 0; }
virtual int getChimeras(){ return 0; }
- virtual int print(ostream&, ostream&){ return 0; }
+ virtual Sequence* print(ostream&, ostream&){ return NULL; }
+ virtual int print(ostream&, ostream&, string){ return 0; }
#ifdef USE_MPI
- virtual int print(MPI_File&, MPI_File&){ return 0; }
+ virtual Sequence* print(MPI_File&, MPI_File&){ return 0; }
+ virtual int print(MPI_File&, MPI_File&, string){ return 0; }
#endif
vector<string> tempOutNames;
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ChimeraBellerophonCommand", "ChimeraBellerophonCommand");
vector<string> tempOutNames;
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI); //comm, filename, mode, info, filepointer
MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
- numSeqs = chimera->print(outMPI, outMPIAccnos);
+ numSeqs = chimera->print(outMPI, outMPIAccnos, "");
MPI_File_close(&outMPI);
MPI_File_close(&outMPIAccnos);
ofstream out2;
m->openOutputFile(accnosFileName, out2);
- numSeqs = chimera->print(out, out2);
+ numSeqs = chimera->print(out, out2, "");
out.close();
out2.close();
#ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
+ int pid, numSeqsPerProcessor;
int tag = 2001;
vector<unsigned long int> MPIPos;
if (m->control_pressed) { delete candidateSeq; return 1; }
//print results
- bool isChimeric = chimera->print(outMPI, outAccMPI);
+ chimera->print(outMPI, outAccMPI);
}
}
delete candidateSeq;
#ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
+ int pid, numSeqsPerProcessor;
int tag = 2001;
vector<unsigned long int> MPIPos;
}
}
//***************************************************************************************************************
-int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
+Sequence* ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
try {
m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine();
}
}
- return 0;
+ return NULL;
}
catch(exception& e) {
m->errorOut(e, "ChimeraCheckRDP", "print");
}
#ifdef USE_MPI
//***************************************************************************************************************
-int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) {
+Sequence* ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) {
try {
cout << "Processing: " << querySeq->getName() << endl;
}
}
- return 0;
+ return NULL;
}
catch(exception& e) {
m->errorOut(e, "ChimeraCheckRDP", "print");
~ChimeraCheckRDP();
int getChimeras(Sequence*);
- int print(ostream&, ostream&);
+ Sequence* print(ostream&, ostream&);
#ifdef USE_MPI
- int print(MPI_File&, MPI_File&);
+ Sequence* print(MPI_File&, MPI_File&);
#endif
private:
templateSeqsLength = chimera->getLength();
#ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
+ int pid, numSeqsPerProcessor;
int tag = 2001;
vector<unsigned long int> MPIPos;
if (m->control_pressed) { delete candidateSeq; return 1; }
//print results
- bool isChimeric = chimera->print(outMPI, outAccMPI);
+ chimera->print(outMPI, outAccMPI);
}
}
delete candidateSeq;
#include "blastdb.hpp"
//***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div,
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
try {
fastafile = file;
increment = inc;
numWanted = numw;
realign = r;
+ trimChimera = trim;
decalc = new DeCalculator();
}
}
//***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, string abunds, int k, int ms, int mms, int win, float div,
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div,
int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
try {
fastafile = file; templateSeqs = readSeqs(fastafile);
numWanted = numw;
realign = r;
includeAbunds = abunds;
+ trimChimera = trim;
//read name file and create nameMapRank
readNameFile(name);
out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
}
//***************************************************************************************************************
-int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
+Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
try {
+ Sequence* trim = NULL;
+ if (trimChimera) { trim = new Sequence(trimQuery->getName(), trimQuery->getAligned()); }
+
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
outAcc << querySeq->getName() << endl;
+
+ if (trimChimera) {
+ int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
+ int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
+
+ string newAligned = trim->getAligned();
+
+ if (lengthLeft > lengthRight) { //trim right
+ for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ }else { //trim left
+ for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
+ }
+ trim->setAligned(newAligned);
+ }
+
}
}
out << endl;
}else { out << querySeq->getName() << "\tno" << endl; }
- return 0;
+ return trim;
}
catch(exception& e) {
}
#ifdef USE_MPI
//***************************************************************************************************************
-int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
+Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
try {
MPI_Status status;
bool results = false;
string outAccString = "";
string outputString = "";
+ Sequence* trim = NULL;
+ if (trimChimera) { trim = new Sequence(trimQuery->getName(), trimQuery->getAligned()); }
+
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
delete buf2;
+
+ if (trimChimera) {
+ int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
+ int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
+
+ string newAligned = trim->getAligned();
+ if (lengthLeft > lengthRight) { //trim right
+ for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ }else { //trim left
+ for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+ }
+ trim->setAligned(newAligned);
+ }
}
}
}
- return results;
+ return trim;
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayer", "print");
//***************************************************************************************************************
int ChimeraSlayer::getChimeras(Sequence* query) {
try {
+ trimQuery = query;
+
chimeraFlags = "no";
//filter query
class ChimeraSlayer : public Chimera {
public:
- ChimeraSlayer(string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
- ChimeraSlayer(string, string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
+ ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
+ ChimeraSlayer(string, string, bool, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
~ChimeraSlayer();
int getChimeras(Sequence*);
- int print(ostream&, ostream&);
+ Sequence* print(ostream&, ostream&);
void printHeader(ostream&);
int doPrep();
#ifdef USE_MPI
- int print(MPI_File&, MPI_File&);
+ Sequence* print(MPI_File&, MPI_File&);
#endif
private:
Sequence* querySeq;
+ Sequence* trimQuery;
DeCalculator* decalc;
map<int, int> spotMap;
Database* databaseRight;
vector<data_struct> chimeraResults;
string chimeraFlags, searchMethod, fastafile, includeAbunds;
- bool realign;
+ bool realign, trimChimera;
int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment;
float divR;
//**********************************************************************************************************************
vector<string> ChimeraSlayerCommand::getValidParameters(){
try {
- string AlignArray[] = {"fasta", "processors", "name","window", "include","template","numwanted", "ksize", "match","mismatch",
+ string AlignArray[] = {"fasta", "processors","trim", "name","window", "include","template","numwanted", "ksize", "match","mismatch",
"divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
return myArray;
vector<string> tempOutNames;
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
else {
//valid paramters for this command
- string Array[] = {"fasta", "processors","name", "include","window", "template","numwanted", "ksize", "match","mismatch",
+ string Array[] = {"fasta", "processors","name", "include","trim", "window", "template","numwanted", "ksize", "match","mismatch",
"divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
vector<string> tempOutNames;
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
temp = validParameter.validFile(parameters, "realign", false); if (temp == "not found") { temp = "f"; }
realign = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found") { temp = "f"; }
+ trim = m->isTrue(temp);
+
search = validParameter.validFile(parameters, "search", false); if (search == "not found") { search = "distance"; }
temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "100"; }
m->mothurOut("The chimera.slayer command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
m->mothurOut("This command was modeled after the chimeraSlayer written by the Broad Institute.\n");
- m->mothurOut("The chimera.slayer command parameters are fasta, name, template, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
+ m->mothurOut("The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
m->mothurOut("The name parameter allows you to provide a name file, if you are using template=self. \n");
m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n");
#ifdef USE_MPI
m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
#endif
+ m->mothurOut("The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest peice, default=F. \n");
m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=50. \n");
m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n");
m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n");
int start = time(NULL);
if (templatefile != "self") { //you want to run slayer with a refernce template
- chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
+ chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
}else {
if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
- chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
+ chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
}else {
m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
string nameFile = filenames["name"][0];
fastaFileNames[s] = filenames["fasta"][0];
- chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
+ chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
}
}
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimeras";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera";
string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos";
+ string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta";
if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
templateSeqsLength = chimera->getLength();
#ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
+ int pid, numSeqsPerProcessor;
int tag = 2001;
vector<unsigned long int> MPIPos;
MPI_File inMPI;
MPI_File outMPI;
MPI_File outMPIAccnos;
+ MPI_File outMPIFasta;
int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
int inMode=MPI_MODE_RDONLY;
char outAccnosFilename[1024];
strcpy(outAccnosFilename, accnosFileName.c_str());
+
+ char outFastaFilename[1024];
+ strcpy(outFastaFilename, trimFastaFileName.c_str());
char inFileName[1024];
strcpy(inFileName, fastaFileNames[s].c_str());
MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+ if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
if (pid == 0) { //you are the root process
m->mothurOutEndLine();
if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
//do your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; }
}else{ //you are a child process
MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
//do your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
}
//close files
MPI_File_close(&inMPI);
MPI_File_close(&outMPI);
- MPI_File_close(&outMPIAccnos);
+ MPI_File_close(&outMPIAccnos);
+ if (trim) { MPI_File_close(&outMPIFasta); }
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
//break up file
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
- if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
}else{
processIDS.resize(0);
- numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
+ if (trim) { rename((trimFastaFileName + toString(processIDS[0]) + ".temp").c_str(), trimFastaFileName.c_str()); }
//append output files
for(int i=1;i<processors;i++){
remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
}
- if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
+ if (trim) {
+ for(int i=1;i<processors;i++){
+ m->appendFiles((trimFastaFileName + toString(processIDS[i]) + ".temp"), trimFastaFileName);
+ remove((trimFastaFileName + toString(processIDS[i]) + ".temp").c_str());
+ }
+ }
+
+ if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
}
#else
- numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
- if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
#endif
outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
+ if (trim) { outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }
m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
}
}
//**********************************************************************************************************************
-int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
+int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos, string fasta){
try {
ofstream out;
m->openOutputFile(outputFName, out);
ofstream out2;
m->openOutputFile(accnos, out2);
+ ofstream out3;
+ if (trim) { m->openOutputFile(fasta, out3); }
+
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
while (!done) {
- if (m->control_pressed) { return 1; }
+ if (m->control_pressed) { out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; }
Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
if (m->control_pressed) { delete candidateSeq; return 1; }
//print results
- chimera->print(out, out2);
+ Sequence* trimmed = chimera->print(out, out2);
+
+ if (trim) { trimmed->printSequence(out3); delete trimmed; }
}
count++;
}
out.close();
out2.close();
+ if (trim) { out3.close(); }
inFASTA.close();
return count;
}
//**********************************************************************************************************************
#ifdef USE_MPI
-int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
+int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector<unsigned long int>& MPIPos){
try {
MPI_Status status;
int pid;
chimera->getChimeras(candidateSeq);
if (m->control_pressed) { delete candidateSeq; return 1; }
- //cout << "about to print" << endl;
+
//print results
- bool isChimeric = chimera->print(outMPI, outAccMPI);
+ Sequence* trimmed = chimera->print(outMPI, outAccMPI);
+
+ if (trim) {
+ string outputString = ">" + trimmed->getName() + "\n" + trimmed->getAligned() + "\n";
+
+ //write to accnos file
+ int length = outputString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outputString.c_str(), length);
+
+ MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status);
+ delete buf2;
+ }
+
}
}
delete candidateSeq;
/**************************************************************************************************/
-int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos) {
+int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+ num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp");
//pass numSeqs to parent
ofstream out;
vector<int> processIDS; //processid
vector<linePair*> lines;
- int driver(linePair*, string, string, string);
- int createProcesses(string, string, string);
+ int driver(linePair*, string, string, string, string);
+ int createProcesses(string, string, string, string);
#ifdef USE_MPI
- int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
+ int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
#endif
- bool abort, realign;
+ bool abort, realign, trim;
string fastafile, templatefile, outputDir, search, namefile, includeAbunds;
int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
float divR;
for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
#ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
+ int pid, numSeqsPerProcessor;
int tag = 2001;
vector<unsigned long int> MPIPos;
//for each file group figure out which process will complete it
//want to divide the load intelligently so the big files are spread between processes
- int count = 1;
for (int i = 0; i < distName.size(); i++) {
int processToAssign = (i+1) % processors;
if (processToAssign == 0) { processToAssign = processors; }
mout->executing = true;
#ifdef USE_MPI
int pid, numProcesses;
- MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
MPI_Comm_size(MPI_COMM_WORLD, &numProcesses);
string filteredFasta = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])) + "filter.fasta";
#ifdef USE_MPI
- int pid, start, end, numSeqsPerProcessor, num;
+ int pid, numSeqsPerProcessor, num;
int tag = 2001;
vector<unsigned long int>MPIPos;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
MPI_File outMPI;
- MPI_File tempMPI;
MPI_File inMPI;
int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
int inMode=MPI_MODE_RDONLY;
MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
- int startTime = time(NULL);
+
string outputString = "";
size = 0;
MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
- int startTime = time(NULL);
+
string outputString = "";
size = 0;
}
}
//***************************************************************************************************************
-int Pintail::print(ostream& out, ostream& outAcc) {
+Sequence* Pintail::print(ostream& out, ostream& outAcc) {
try {
+
int index = ceil(deviation);
//is your DE value higher than the 95%
for (int m = 0; m < expectedDistance.size(); m++) { out << expectedDistance[m] << '\t'; }
out << endl;
- return 0;
+ return NULL;
}
catch(exception& e) {
}
#ifdef USE_MPI
//***************************************************************************************************************
-int Pintail::print(MPI_File& out, MPI_File& outAcc) {
+Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) {
try {
- bool results = false;
+
string outputString = "";
int index = ceil(deviation);
MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
delete buf;
- results = true;
+ return NULL;
}
outputString += "Observed\t";
MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
delete buf2;
- return results;
+ return NULL;
}
catch(exception& e) {
m->errorOut(e, "Pintail", "print");
~Pintail();
int getChimeras(Sequence*);
- int print(ostream&, ostream&);
+ Sequence* print(ostream&, ostream&);
void setCons(string c) { consfile = c; }
void setQuantiles(string q) { quanfile = q; }
#ifdef USE_MPI
- int print(MPI_File&, MPI_File&);
+ Sequence* print(MPI_File&, MPI_File&);
#endif
private:
int start = time(NULL);
#ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
+ int pid, numSeqsPerProcessor;
int tag = 2001;
vector<unsigned long int> MPIPos;
//flip it so you can print it
int count = 0;
for (int r=0; r<globaldata->Groups.size(); r++) {
- for (int l = r+1; l < globaldata->Groups.size(); l++) {
+ for (int l = 0; l < r; l++) {
dists[r][l] = utreeScores[count][0];
dists[l][r] = utreeScores[count][0];
count++;
//flip it so you can print it
for (int r=0; r<globaldata->Groups.size(); r++) {
- for (int l = r+1; l < globaldata->Groups.size(); l++) {
+ for (int l = 0; l < r; l++) {
dists[r][l] = utreeScores[count];
dists[l][r] = utreeScores[count];
count++;