A7D216061199C47F00F13F13 /* catchallcommand.h */,
A7D216071199C47F00F13F13 /* catchallcommand.cpp */,
A7DA2017113FECD400BF472F /* chimeraseqscommand.cpp */,
- A78434881162224F00100BE0 /* chimeraccodecommand.h */,
A7DA2018113FECD400BF472F /* chimeraseqscommand.h */,
A7825502116519F70002E2DD /* chimerabellerophoncommand.h */,
A7825503116519F70002E2DD /* chimerabellerophoncommand.cpp */,
A747E79B1163442A00FB9042 /* chimeracheckcommand.h */,
A747E79C1163442A00FB9042 /* chimeracheckcommand.cpp */,
+ A78434881162224F00100BE0 /* chimeraccodecommand.h */,
A78434891162224F00100BE0 /* chimeraccodecommand.cpp */,
A78254461164D7790002E2DD /* chimerapintailcommand.h */,
A78254471164D7790002E2DD /* chimerapintailcommand.cpp */,
int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
int inMode=MPI_MODE_RDONLY;
- //char* outAlignFilename = new char[alignFileName.length()];
- //memcpy(outAlignFilename, alignFileName.c_str(), alignFileName.length());
-
char outAlignFilename[1024];
strcpy(outAlignFilename, alignFileName.c_str());
-
- //char* outReportFilename = new char[reportFileName.length()];
- //memcpy(outReportFilename, reportFileName.c_str(), reportFileName.length());
char outReportFilename[1024];
strcpy(outReportFilename, reportFileName.c_str());
-
- //char* outAccnosFilename = new char[accnosFileName.length()];
- //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
char outAccnosFilename[1024];
strcpy(outAccnosFilename, accnosFileName.c_str());
-
- //char* inFileName = new char[candidateFileNames[s].length()];
- //memcpy(inFileName, candidateFileNames[s].c_str(), candidateFileNames[s].length());
char inFileName[1024];
strcpy(inFileName, candidateFileNames[s].c_str());
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("fasta");
- //user has given a template file
- if(it != parameters.end()){
- path = hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["fasta"] = inputDir + it->second; }
+
+ fastafile = validParameter.validFile(parameters, "fasta", false);
+ if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.bellerophon command."); m->mothurOutEndLine(); abort = true; }
+ else {
+ splitAtDash(fastafile, fastaFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+ if (inputDir != "") {
+ string path = hasPath(fastaFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ #endif
+
+ ableToOpen = openInputFile(fastaFileNames[i], in);
+ in.close();
+
+ #ifdef USE_MPI
+ for (int j = 1; j < processors; j++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Status status;
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+
+ #endif
+
+ if (ableToOpen == 1) {
+ m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ fastaFileNames.erase(fastaFileNames.begin()+i);
+ i--;
+ }
+
}
+
+ //make sure there is at least one valid file left
+ if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
}
-
-
- //check for required parameters
- fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
- else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.bellerophon command."); m->mothurOutEndLine(); abort = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
try {
m->mothurOut("The chimera.bellerophon command reads a fastafile and creates list of potentially chimeric sequences.\n");
m->mothurOut("The chimera.bellerophon command parameters are fasta, filter, correction, processors, window, increment. The fasta parameter is required.\n");
+ m->mothurOut("The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter, default=false. \n");
m->mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs, default=true.\n");
m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default is 1/4 sequence length. \n");
m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default is 25.\n");
m->mothurOut("chimera.bellerophon(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors) \n");
- m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, window=200) \n");
+ m->mothurOut("Example: chimera.bellerophon(fasta=AD.align, filter=True, correction=true, window=200) \n");
m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
}
catch(exception& e) {
if (abort == true) { return 0; }
- int start = time(NULL);
-
- chimera = new Bellerophon(fastafile, filter, correction, window, increment, processors, outputDir);
-
- string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "bellerophon.chimeras";
- string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "bellerophon.accnos";
- bool hasAccnos = true;
-
- chimera->getChimeras();
-
- if (m->control_pressed) { delete chimera; return 0; }
-
- #ifdef USE_MPI
- MPI_File outMPI;
- MPI_File outMPIAccnos;
-
- int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
-
- //char* outFilename = new char[accnosFileName.length()];
- //memcpy(outFilename, accnosFileName.c_str(), accnosFileName.length());
-
- char outFilename[1024];
- strcpy(outFilename, accnosFileName.c_str());
-
- //char* FileName = new char[outputFileName.length()];
- //memcpy(FileName, outputFileName.c_str(), outputFileName.length());
-
- char FileName[1024];
- strcpy(FileName, outputFileName.c_str());
-
- MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI); //comm, filename, mode, info, filepointer
- MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
-
- //delete FileName;
- //delete outFilename;
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+
+ m->mothurOut("Checking sequences from " + fastaFileNames[i] + " ..." ); m->mothurOutEndLine();
+
+ int start = time(NULL);
+
+ chimera = new Bellerophon(fastaFileNames[i], filter, correction, window, increment, processors, outputDir);
+
+ string outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "bellerophon.chimeras";
+ string accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "bellerophon.accnos";
+ bool hasAccnos = true;
+
+ chimera->getChimeras();
+
+ if (m->control_pressed) { delete chimera; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+
+ #ifdef USE_MPI
+ MPI_File outMPI;
+ MPI_File outMPIAccnos;
+
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+
+ char outFilename[1024];
+ strcpy(outFilename, accnosFileName.c_str());
- numSeqs = chimera->print(outMPI, outMPIAccnos);
-
- MPI_File_close(&outMPI);
- MPI_File_close(&outMPIAccnos);
+ char FileName[1024];
+ strcpy(FileName, outputFileName.c_str());
- #else
+ MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
- ofstream out;
- openOutputFile(outputFileName, out);
-
- ofstream out2;
- openOutputFile(accnosFileName, out2);
-
- numSeqs = chimera->print(out, out2);
- out.close();
- out2.close();
-
- #endif
-
- if (m->control_pressed) { remove(accnosFileName.c_str()); remove(outputFileName.c_str()); delete chimera; return 0; }
+ numSeqs = chimera->print(outMPI, outMPIAccnos);
+
+ MPI_File_close(&outMPI);
+ MPI_File_close(&outMPIAccnos);
+
+ #else
- //delete accnos file if its blank
- if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
+ ofstream out;
+ openOutputFile(outputFileName, out);
+
+ ofstream out2;
+ openOutputFile(accnosFileName, out2);
+
+ numSeqs = chimera->print(out, out2);
+ out.close();
+ out2.close();
+
+ #endif
+
+ if (m->control_pressed) { remove(accnosFileName.c_str()); remove(outputFileName.c_str()); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete chimera; return 0; }
+
+ //delete accnos file if its blank
+ if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
+
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
+
+ outputNames.push_back(outputFileName);
+ if (hasAccnos) { outputNames.push_back(accnosFileName); }
+
+ delete chimera;
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine();
- if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); }
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
- m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-
- delete chimera;
return 0;
string fastafile, outputDir;
int processors, window, increment, numSeqs;
Chimera* chimera;
+ vector<string> outputNames;
+ vector<string> fastaFileNames;
};
/***********************************************************/
if (inputDir == "not found"){ inputDir = ""; }
else {
string path;
- it = parameters.find("fasta");
- //user has given a template file
- if(it != parameters.end()){
- path = hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["fasta"] = inputDir + it->second; }
- }
-
it = parameters.find("template");
//user has given a template file
if(it != parameters.end()){
}
}
-
//check for required parameters
- fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
- else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true; }
+ fastafile = validParameter.validFile(parameters, "fasta", false);
+ if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true; }
+ else {
+ splitAtDash(fastafile, fastaFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+ if (inputDir != "") {
+ string path = hasPath(fastaFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ #endif
+
+ ableToOpen = openInputFile(fastaFileNames[i], in);
+ in.close();
+
+ #ifdef USE_MPI
+ for (int j = 1; j < processors; j++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Status status;
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+
+ #endif
+
+ if (ableToOpen == 1) {
+ m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ fastaFileNames.erase(fastaFileNames.begin()+i);
+ i--;
+ }
+ }
+
+ //make sure there is at least one valid file left
+ if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+ }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
m->mothurOut("This command was created using the algorythms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n");
m->mothurOut("The chimera.ccode command parameters are fasta, template, filter, mask, processors, window and numwanted.\n");
m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+ m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
m->mothurOut("The chimera.ccode command should be in the following format: \n");
m->mothurOut("chimera.ccode(fasta=yourFastaFile, template=yourTemplate) \n");
- m->mothurOut("Example: chimera.seqs(fasta=AD.align, template=core_set_aligned.imputed.fasta) \n");
+ m->mothurOut("Example: chimera.ccode(fasta=AD.align, template=core_set_aligned.imputed.fasta) \n");
m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
}
catch(exception& e) {
if (abort == true) { return 0; }
- int start = time(NULL);
-
- //set user options
- if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
-
- chimera = new Ccode(fastafile, templatefile, filter, maskfile, window, numwanted, outputDir);
-
- //is your template aligned?
- if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; }
- templateSeqsLength = chimera->getLength();
-
- string outputFileName, accnosFileName;
- if (maskfile != "") {
- outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".ccode.chimeras";
- accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".ccode.accnos";
- }else {
- outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "ccode.chimeras";
- accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "ccode.accnos";
- }
-
- string mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
- bool hasAccnos = true;
-
- if (m->control_pressed) { delete chimera; return 0; }
+ for (int s = 0; s < fastaFileNames.size(); s++) {
+
+ m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
- #ifdef USE_MPI
-
- int pid, end, numSeqsPerProcessor;
- int tag = 2001;
- vector<long> MPIPos;
- MPIWroteAccnos = false;
-
- MPI_Status status;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
- MPI_File inMPI;
- MPI_File outMPI;
- MPI_File outMPIAccnos;
+ int start = time(NULL);
- int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
- int inMode=MPI_MODE_RDONLY;
+ //set user options
+ if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
- //char* outFilename = new char[outputFileName.length()];
- //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
-
- char outFilename[1024];
- strcpy(outFilename, outputFileName.c_str());
+ chimera = new Ccode(fastaFileNames[s], templatefile, filter, maskfile, window, numwanted, outputDir);
- //char* outAccnosFilename = new char[accnosFileName.length()];
- //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
+ //is your template aligned?
+ if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; }
+ templateSeqsLength = chimera->getLength();
- char outAccnosFilename[1024];
- strcpy(outAccnosFilename, accnosFileName.c_str());
+ string outputFileName, accnosFileName;
+ if (maskfile != "") {
+ outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.chimeras";
+ accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.accnos";
+ }else {
+ outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "ccode.chimeras";
+ accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "ccode.accnos";
+ }
- //char* inFileName = new char[fastafile.length()];
- //memcpy(inFileName, fastafile.c_str(), fastafile.length());
+ string mapInfo = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "mapinfo";
+ bool hasAccnos = true;
- char inFileName[1024];
- strcpy(inFileName, fastafile.c_str());
-
- MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
- MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
- MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+ if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
- //delete inFileName;
- //delete outFilename;
- //delete outAccnosFilename;
-
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
+ #ifdef USE_MPI
- if (pid == 0) { //you are the root process
- string outTemp = "For full window mapping info refer to " + mapInfo + "\n\n";
+ int pid, end, numSeqsPerProcessor;
+ int tag = 2001;
+ vector<long> MPIPos;
+ MPIWroteAccnos = false;
- //print header
- int length = outTemp.length();
- char* buf2 = new char[length];
- memcpy(buf2, outTemp.c_str(), length);
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
- MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
- delete buf2;
-
- MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+ MPI_File inMPI;
+ MPI_File outMPI;
+ MPI_File outMPIAccnos;
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
- }
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+ int inMode=MPI_MODE_RDONLY;
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+ char outFilename[1024];
+ strcpy(outFilename, outputFileName.c_str());
+ char outAccnosFilename[1024];
+ strcpy(outAccnosFilename, accnosFileName.c_str());
+
+ char inFileName[1024];
+ strcpy(inFileName, fastaFileNames[s].c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+ MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+ if (pid == 0) { //you are the root process
+ string outTemp = "For full window mapping info refer to " + mapInfo + "\n\n";
+
+ //print header
+ int length = outTemp.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outTemp.c_str(), length);
+
+ MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
+ delete buf2;
+
+ MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; }
+ //align your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
- for (int i = 1; i < processors; i++) {
- bool tempResult;
- MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
- if (tempResult != 0) { MPIWroteAccnos = true; }
+ for (int i = 1; i < processors; i++) {
+ bool tempResult;
+ MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+ if (tempResult != 0) { MPIWroteAccnos = true; }
+ }
+ }else{ //you are a child process
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ MPIPos.resize(numSeqs+1);
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+
+
+ //align your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
- }else{ //you are a child process
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- MPIPos.resize(numSeqs+1);
- MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+ //close files
+ MPI_File_close(&inMPI);
+ MPI_File_close(&outMPI);
+ MPI_File_close(&outMPIAccnos);
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
-
- MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
- }
-
- //close files
- MPI_File_close(&inMPI);
- MPI_File_close(&outMPI);
- MPI_File_close(&outMPIAccnos);
+ //delete accnos file if blank
+ if (pid == 0) {
+ if (!MPIWroteAccnos) {
+ hasAccnos = false;
+ remove(accnosFileName.c_str());
+ }
+ }
+
+ #else
+ ofstream outHeader;
+ string tempHeader = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + "ccode.chimeras.tempHeader";
+ openOutputFile(tempHeader, outHeader);
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ outHeader << "For full window mapping info refer to " << mapInfo << endl << endl;
+
+ outHeader.close();
- //delete accnos file if blank
- if (pid == 0) {
- if (!MPIWroteAccnos) {
- //MPI_Info info;
- //MPI_File_delete(outAccnosFilename, info);
- hasAccnos = false;
- remove(accnosFileName.c_str());
+ //break up file
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[s], inFASTA);
+ getNumSeqs(inFASTA, numSeqs);
+ inFASTA.close();
+
+ lines.push_back(new linePair(0, numSeqs));
+
+ driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+
+ if (m->control_pressed) {
+ remove(outputFileName.c_str());
+ remove(tempHeader.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
+
+ //delete accnos file if its blank
+ if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
+
+ }else{
+ vector<int> positions;
+ processIDS.resize(0);
+
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[s], inFASTA);
+
+ string input;
+ while(!inFASTA.eof()){
+ input = getline(inFASTA);
+ if (input.length() != 0) {
+ if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+ }
+ }
+ inFASTA.close();
+
+ numSeqs = positions.size();
+
+ int numSeqsPerProcessor = numSeqs / processors;
+
+ for (int i = 0; i < processors; i++) {
+ long int startPos = positions[ i * numSeqsPerProcessor ];
+ if(i == processors - 1){
+ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+ }
+ lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+ }
+
+
+ createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
+
+ rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+
+ //append output files
+ for(int i=1;i<processors;i++){
+ appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+ remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+ }
+
+ vector<string> nonBlankAccnosFiles;
+ //delete blank accnos files generated with multiple processes
+ for(int i=0;i<processors;i++){
+ if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+ nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+ }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); }
+ }
+
+ //append accnos files
+ if (nonBlankAccnosFiles.size() != 0) {
+ rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+
+ for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+ appendFiles(nonBlankAccnosFiles[h], accnosFileName);
+ remove(nonBlankAccnosFiles[h].c_str());
+ }
+ }else{ hasAccnos = false; }
+
+ if (m->control_pressed) {
+ remove(outputFileName.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
+
}
- }
-
- #else
- ofstream outHeader;
- string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + "ccode.chimeras.tempHeader";
- openOutputFile(tempHeader, outHeader);
-
- outHeader << "For full window mapping info refer to " << mapInfo << endl << endl;
- outHeader.close();
-
- //break up file
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
+ #else
ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
+ openInputFile(fastaFileNames[s], inFASTA);
getNumSeqs(inFASTA, numSeqs);
inFASTA.close();
-
lines.push_back(new linePair(0, numSeqs));
- driver(lines[0], outputFileName, fastafile, accnosFileName);
+ driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
+ remove(outputFileName.c_str());
+ remove(tempHeader.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
}
//delete accnos file if its blank
if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
-
- }else{
- vector<int> positions;
- processIDS.resize(0);
-
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
-
- createProcesses(outputFileName, fastafile, accnosFileName);
-
- rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
-
- //append output files
- for(int i=1;i<processors;i++){
- appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
- remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
- }
-
- vector<string> nonBlankAccnosFiles;
- //delete blank accnos files generated with multiple processes
- for(int i=0;i<processors;i++){
- if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
- nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
- }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); }
- }
-
- //append accnos files
- if (nonBlankAccnosFiles.size() != 0) {
- rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
-
- for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
- appendFiles(nonBlankAccnosFiles[h], accnosFileName);
- remove(nonBlankAccnosFiles[h].c_str());
- }
- }else{ hasAccnos = false; }
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
-
- }
-
- #else
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
+ #endif
+
+ appendFiles(outputFileName, tempHeader);
+
+ remove(outputFileName.c_str());
+ rename(tempHeader.c_str(), outputFileName.c_str());
+ #endif
+
+ delete chimera;
- driver(lines[0], outputFileName, fastafile, accnosFileName);
+ outputNames.push_back(outputFileName);
+ outputNames.push_back(mapInfo);
+ if (hasAccnos) { outputNames.push_back(accnosFileName); }
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- //delete accnos file if its blank
- if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
- #endif
-
- //m->mothurOut("Output File Names: ");
- //if ((filter) && (method == "bellerophon")) { m->mothurOut(
- //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
- // else { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
-
- appendFiles(outputFileName, tempHeader);
-
- remove(outputFileName.c_str());
- rename(tempHeader.c_str(), outputFileName.c_str());
- #endif
-
- delete chimera;
-
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine();
- if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); }
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
-
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
-
- m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
return 0;
string fastafile, templatefile, outputDir, maskfile;
int processors, window, numwanted, numSeqs, templateSeqsLength;
Chimera* chimera;
+ vector<string> fastaFileNames;
+ vector<string> outputNames;
};
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
- string path;
- it = parameters.find("fasta");
- //user has given a template file
- if(it != parameters.end()){
- path = hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["fasta"] = inputDir + it->second; }
- }
-
it = parameters.find("template");
//user has given a template file
if(it != parameters.end()){
- path = hasPath(it->second);
+ string path = hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["template"] = inputDir + it->second; }
}
-
- it = parameters.find("name");
- //user has given a template file
- if(it != parameters.end()){
- path = hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["name"] = inputDir + it->second; }
- }
}
-
//check for required parameters
- fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
- else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; }
+ fastafile = validParameter.validFile(parameters, "fasta", false);
+ if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; }
+ else {
+ splitAtDash(fastafile, fastaFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+ if (inputDir != "") {
+ string path = hasPath(fastaFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ #endif
+
+ ableToOpen = openInputFile(fastaFileNames[i], in);
+ in.close();
+
+ #ifdef USE_MPI
+ for (int j = 1; j < processors; j++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Status status;
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+
+ #endif
+
+ if (ableToOpen == 1) {
+ m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ fastaFileNames.erase(fastaFileNames.begin()+i);
+ i--;
+ }
+ }
+
+ //make sure there is at least one valid file left
+ if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+ }
//if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
- outputDir = "";
- outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it
- }
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
templatefile = validParameter.validFile(parameters, "template", true);
if (templatefile == "not open") { abort = true; }
else if (templatefile == "not found") { templatefile = ""; m->mothurOut("template is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; }
- namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
- else if (namefile == "not found") { namefile = ""; }
+ namefile = validParameter.validFile(parameters, "name", false);
+ if (namefile == "not found") { namefile = ""; }
+ else {
+ splitAtDash(namefile, nameFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < nameFileNames.size(); i++) {
+ if (inputDir != "") {
+ string path = hasPath(nameFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ #endif
+
+ ableToOpen = openInputFile(nameFileNames[i], in);
+ in.close();
+
+ #ifdef USE_MPI
+ for (int j = 1; j < processors; j++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Status status;
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+
+ #endif
+
+ if (ableToOpen == 1) {
+ m->mothurOut(nameFileNames[i] + " will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ nameFileNames.erase(nameFileNames.begin()+i);
+ i--;
+ }
+
+ }
+
+ //make sure there is at least one valid file left
+ if (nameFileNames.size() != 0) {
+ if (nameFileNames.size() != fastaFileNames.size()) {
+ m->mothurOut("Different number of valid name files and fasta files, aborting command."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
+ }
string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
convert(temp, processors);
temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; }
svg = isTrue(temp);
+ if (nameFileNames.size() != 0) { svg = true; }
temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "10"; }
convert(temp, increment);
m->mothurOut("This command was created using the algorythms described in CHIMERA_CHECK version 2.7 written by Niels Larsen. \n");
m->mothurOut("The chimera.check command parameters are fasta, template, processors, ksize, increment, svg and name.\n");
m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+ m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
#ifdef USE_MPI
m->mothurOut("The ksize parameter allows you to input kmersize, default is 7. \n");
m->mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n");
m->mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
+ m->mothurOut("You may enter multiple name files by separating their names with dashes. ie. fasta=abrecovery.svg.names-amzon.svg.names \n");
m->mothurOut("The chimera.check command should be in the following format: \n");
m->mothurOut("chimera.check(fasta=yourFastaFile, template=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n");
m->mothurOut("Example: chimera.check(fasta=AD.fasta, template=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n");
if (abort == true) { return 0; }
- int start = time(NULL);
-
- chimera = new ChimeraCheckRDP(fastafile, templatefile, namefile, svg, increment, ksize, outputDir);
-
- if (m->control_pressed) { delete chimera; return 0; }
-
- string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "chimeracheck.chimeras";
-
- #ifdef USE_MPI
-
- int pid, end, numSeqsPerProcessor;
- int tag = 2001;
- vector<long> MPIPos;
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+
+ m->mothurOut("Checking sequences from " + fastaFileNames[i] + " ..." ); m->mothurOutEndLine();
- MPI_Status status;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
- MPI_File inMPI;
- MPI_File outMPI;
-
- int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
- int inMode=MPI_MODE_RDONLY;
+ int start = time(NULL);
- //char* outFilename = new char[outputFileName.length()];
- //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
+ string thisNameFile = "";
+ if (nameFileNames.size() != 0) { thisNameFile = nameFileNames[i]; }
- char outFilename[1024];
- strcpy(outFilename, outputFileName.c_str());
+ chimera = new ChimeraCheckRDP(fastaFileNames[i], templatefile, thisNameFile, svg, increment, ksize, outputDir);
- //char* inFileName = new char[fastafile.length()];
- //memcpy(inFileName, fastafile.c_str(), fastafile.length());
+ if (m->control_pressed) { delete chimera; return 0; }
- char inFileName[1024];
- strcpy(inFileName, fastafile.c_str());
-
- MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
- MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+ string outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "chimeracheck.chimeras";
+ outputNames.push_back(outputFileName);
- //delete outFilename;
- //delete inFileName;
+ #ifdef USE_MPI
+
+ int pid, end, numSeqsPerProcessor;
+ int tag = 2001;
+ vector<long> MPIPos;
+
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); delete chimera; return 0; }
+ MPI_File inMPI;
+ MPI_File outMPI;
+
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+ int inMode=MPI_MODE_RDONLY;
+
+ char outFilename[1024];
+ strcpy(outFilename, outputFileName.c_str());
- if (pid == 0) { //you are the root process
- MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
-
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
- }
-
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+ char inFileName[1024];
+ strcpy(inFileName, fastaFileNames[i].c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
-
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); remove(outputFileName.c_str()); delete chimera; return 0; }
+ if (pid == 0) { //you are the root process
+ MPIPos = setFilePosFasta(fastaFileNames[i], numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int j = 1; j < processors; j++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, j, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, j, tag, MPI_COMM_WORLD);
+ }
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+
- //wait on chidren
- for(int i = 1; i < processors; i++) {
+ //align your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ //wait on chidren
+ for(int j = 1; j < processors; j++) {
+ char buf[4];
+ MPI_Recv(buf, 4, MPI_CHAR, j, tag, MPI_COMM_WORLD, &status);
+ }
+ }else{ //you are a child process
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ MPIPos.resize(numSeqs+1);
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+
+ //align your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ //tell parent you are done.
char buf[4];
- MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
+ strcpy(buf, "done");
+ MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
}
- }else{ //you are a child process
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- MPIPos.resize(numSeqs+1);
- MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
-
-
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); delete chimera; return 0; }
-
- //tell parent you are done.
- char buf[4];
- strcpy(buf, "done");
- MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
- }
+ //close files
+ MPI_File_close(&inMPI);
+ MPI_File_close(&outMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ #else
- //close files
- MPI_File_close(&inMPI);
- MPI_File_close(&outMPI);
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
- #else
-
- //break up file
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
+ //break up file
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[i], inFASTA);
+ getNumSeqs(inFASTA, numSeqs);
+ inFASTA.close();
+
+ lines.push_back(new linePair(0, numSeqs));
+
+ driver(lines[0], outputFileName, fastaFileNames[i]);
+
+ if (m->control_pressed) {
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
+
+ }else{
+ vector<int> positions;
+ processIDS.resize(0);
+
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[i], inFASTA);
+
+ string input;
+ while(!inFASTA.eof()){
+ input = getline(inFASTA);
+ if (input.length() != 0) {
+ if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+ }
+ }
+ inFASTA.close();
+
+ numSeqs = positions.size();
+
+ int numSeqsPerProcessor = numSeqs / processors;
+
+ for (int j = 0; j < processors; j++) {
+ long int startPos = positions[ j * numSeqsPerProcessor ];
+ if(j == processors - 1){
+ numSeqsPerProcessor = numSeqs - j * numSeqsPerProcessor;
+ }
+ lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+ }
+
+
+ createProcesses(outputFileName, fastaFileNames[i]);
+
+ rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+
+ //append output files
+ for(int j=1;j<processors;j++){
+ appendFiles((outputFileName + toString(processIDS[j]) + ".temp"), outputFileName);
+ remove((outputFileName + toString(processIDS[j]) + ".temp").c_str());
+ }
+
+ if (m->control_pressed) {
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
+ }
+
+ #else
ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
+ openInputFile(fastaFileNames[i], inFASTA);
getNumSeqs(inFASTA, numSeqs);
inFASTA.close();
-
lines.push_back(new linePair(0, numSeqs));
- driver(lines[0], outputFileName, fastafile);
+ driver(lines[0], outputFileName, fastaFileNames[i]);
if (m->control_pressed) {
- remove(outputFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
-
- }else{
- vector<int> positions;
- processIDS.resize(0);
-
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
+ for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear();
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ delete chimera;
+ return 0;
}
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
-
- createProcesses(outputFileName, fastafile);
+ #endif
+ #endif
+ delete chimera;
+ for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear();
- rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
-
- //append output files
- for(int i=1;i<processors;i++){
- appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
- remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
- }
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
- }
+ m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine();
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
- #else
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
-
- driver(lines[0], outputFileName, fastafile);
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
- #endif
- #endif
- delete chimera;
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
-
- m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine();
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
- m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-
+
return 0;
}
string fastafile, templatefile, namefile, outputDir;
int processors, increment, ksize, numSeqs, templateSeqsLength;
Chimera* chimera;
-
-
+ vector<string> fastaFileNames;
+ vector<string> nameFileNames;
+ vector<string> outputNames;
+
};
/***********************************************************/
}
//if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
string path;
- it = parameters.find("fasta");
- //user has given a template file
- if(it != parameters.end()){
- path = hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["fasta"] = inputDir + it->second; }
- }
-
it = parameters.find("template");
//user has given a template file
if(it != parameters.end()){
//check for required parameters
- fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
- else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
- outputDir = "";
- outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it
- }
+ fastafile = validParameter.validFile(parameters, "fasta", false);
+ if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
+ else {
+ splitAtDash(fastafile, fastaFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+ if (inputDir != "") {
+ string path = hasPath(fastaFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ #endif
- templatefile = validParameter.validFile(parameters, "template", true);
- if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
+ ableToOpen = openInputFile(fastaFileNames[i], in);
+ in.close();
+
+ #ifdef USE_MPI
+ for (int j = 1; j < processors; j++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Status status;
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+
+ #endif
+
+ if (ableToOpen == 1) {
+ m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ fastaFileNames.erase(fastaFileNames.begin()+i);
+ i--;
+ }
+ }
+
+ //make sure there is at least one valid file left
+ if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+ }
- consfile = validParameter.validFile(parameters, "conservation", true);
- if (consfile == "not open") { abort = true; }
- else if (consfile == "not found") { consfile = ""; }
+ string temp;
+ temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
+ filter = isTrue(temp);
- quanfile = validParameter.validFile(parameters, "quantile", true);
- if (quanfile == "not open") { abort = true; }
- else if (quanfile == "not found") { quanfile = ""; }
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
+ convert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
+ convert(temp, window);
+
+ temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
+ convert(temp, increment);
maskfile = validParameter.validFile(parameters, "mask", false);
if (maskfile == "not found") { maskfile = ""; }
if (ableToOpen == 1) { abort = true; }
in.close();
}
-
- string temp;
- temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
- filter = isTrue(temp);
+
- temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
- convert(temp, processors);
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = "";
+ outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it
+ }
+
+ templatefile = validParameter.validFile(parameters, "template", true);
+ if (templatefile == "not open") { abort = true; }
+ else if (templatefile == "not found") { templatefile = ""; m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
- temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
- convert(temp, window);
+ consfile = validParameter.validFile(parameters, "conservation", true);
+ if (consfile == "not open") { abort = true; }
+ else if (consfile == "not found") {
+ consfile = "";
+ //check for consfile
+ string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq";
+ ifstream FileTest(tempConsFile.c_str());
+ if(FileTest){ m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close(); }
+ }
- temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
- convert(temp, increment);
+ quanfile = validParameter.validFile(parameters, "quantile", true);
+ if (quanfile == "not open") { abort = true; }
+ else if (quanfile == "not found") { quanfile = ""; }
}
}
catch(exception& e) {
m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+ m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n");
m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
m->mothurOut("The chimera.pintail command should be in the following format: \n");
- m->mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
- m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
+ m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
+ m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
}
catch(exception& e) {
if (abort == true) { return 0; }
- int start = time(NULL);
-
- //set user options
- if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
-
- chimera = new Pintail(fastafile, templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
-
- string outputFileName, accnosFileName;
- if (maskfile != "") {
- outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.chimeras";
- accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.accnos";
- }else {
- outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "pintail.chimeras";
- accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "pintail.accnos";
- }
- bool hasAccnos = true;
-
- if (m->control_pressed) { delete chimera; return 0; }
-
- if (chimera->getUnaligned()) {
- m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
- delete chimera;
- return 0;
- }
- templateSeqsLength = chimera->getLength();
-
- #ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
- int tag = 2001;
- vector<long> MPIPos;
- MPIWroteAccnos = false;
-
- MPI_Status status;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
+ for (int s = 0; s < fastaFileNames.size(); s++) {
+
+ m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
- MPI_File inMPI;
- MPI_File outMPI;
- MPI_File outMPIAccnos;
-
- int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
- int inMode=MPI_MODE_RDONLY;
-
- //char* outFilename = new char[outputFileName.length()];
- //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
+ int start = time(NULL);
+
+ //set user options
+ if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
+
+ //check for quantile to save the time
+ string tempQuan = "";
+ if ((!filter) && (maskfile == "")) {
+ tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.quan";
+ }else if ((!filter) && (maskfile != "")) {
+ tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.masked.quan";
+ }else if ((filter) && (maskfile != "")) {
+ tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "masked.quan";
+ }else if ((filter) && (maskfile == "")) {
+ tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "quan";
+ }
- char outFilename[1024];
- strcpy(outFilename, outputFileName.c_str());
+ ifstream FileTest(tempQuan.c_str());
+ if(FileTest){ m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close(); }
- //char* outAccnosFilename = new char[accnosFileName.length()];
- //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
+ chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
- char outAccnosFilename[1024];
- strcpy(outAccnosFilename, accnosFileName.c_str());
-
- //char* inFileName = new char[fastafile.length()];
- //memcpy(inFileName, fastafile.c_str(), fastafile.length());
+ string outputFileName, accnosFileName;
+ if (maskfile != "") {
+ outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.chimeras";
+ accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.accnos";
+ }else {
+ outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
+ accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "pintail.accnos";
+ }
+ bool hasAccnos = true;
- char inFileName[1024];
- strcpy(inFileName, fastafile.c_str());
-
- MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
- MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
- MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+ if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
- //delete inFileName;
- //delete outFilename;
- //delete outAccnosFilename;
-
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
+ if (chimera->getUnaligned()) {
+ m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
+ delete chimera;
+ return 0;
+ }
+ templateSeqsLength = chimera->getLength();
+
+ #ifdef USE_MPI
+ int pid, end, numSeqsPerProcessor;
+ int tag = 2001;
+ vector<long> MPIPos;
+ MPIWroteAccnos = false;
+
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
- if (pid == 0) { //you are the root process
-
- MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+ MPI_File inMPI;
+ MPI_File outMPI;
+ MPI_File outMPIAccnos;
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
- }
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+ int inMode=MPI_MODE_RDONLY;
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+ char outFilename[1024];
+ strcpy(outFilename, outputFileName.c_str());
-
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; }
-
- for (int i = 1; i < processors; i++) {
- bool tempResult;
- MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
- if (tempResult != 0) { MPIWroteAccnos = true; }
- }
- }else{ //you are a child process
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- MPIPos.resize(numSeqs+1);
- MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ char outAccnosFilename[1024];
+ strcpy(outAccnosFilename, accnosFileName.c_str());
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+ char inFileName[1024];
+ strcpy(inFileName, fastaFileNames[s].c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+ MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ if (pid == 0) { //you are the root process
+
+ MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+ //do your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ for (int i = 1; i < processors; i++) {
+ bool tempResult;
+ MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+ if (tempResult != 0) { MPIWroteAccnos = true; }
+ }
+ }else{ //you are a child process
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ MPIPos.resize(numSeqs+1);
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+
+ //do your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+ }
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
+ //close files
+ MPI_File_close(&inMPI);
+ MPI_File_close(&outMPI);
+ MPI_File_close(&outMPIAccnos);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
+ //delete accnos file if blank
+ if (pid == 0) {
+ if (!MPIWroteAccnos) {
+ hasAccnos = false;
+ remove(accnosFileName.c_str());
+ }
+ }
- MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
- }
-
- //close files
- MPI_File_close(&inMPI);
- MPI_File_close(&outMPI);
- MPI_File_close(&outMPIAccnos);
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-
- //delete accnos file if blank
- if (pid == 0) {
- if (!MPIWroteAccnos) {
- //MPI_Info info;
- //MPI_File_delete(outAccnosFilename, info);
- hasAccnos = false;
- remove(accnosFileName.c_str());
+ #else
+
+ //break up file
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[s], inFASTA);
+ getNumSeqs(inFASTA, numSeqs);
+ inFASTA.close();
+
+ lines.push_back(new linePair(0, numSeqs));
+
+ driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+
+ if (m->control_pressed) {
+ remove(outputFileName.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
+
+ //delete accnos file if its blank
+ if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
+
+ }else{
+ vector<int> positions;
+ processIDS.resize(0);
+
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[s], inFASTA);
+
+ string input;
+ while(!inFASTA.eof()){
+ input = getline(inFASTA);
+ if (input.length() != 0) {
+ if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+ }
+ }
+ inFASTA.close();
+
+ numSeqs = positions.size();
+
+ int numSeqsPerProcessor = numSeqs / processors;
+
+ for (int i = 0; i < processors; i++) {
+ long int startPos = positions[ i * numSeqsPerProcessor ];
+ if(i == processors - 1){
+ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+ }
+ lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+ }
+
+ createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
+
+ rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+
+ //append output files
+ for(int i=1;i<processors;i++){
+ appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+ remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+ }
+
+ vector<string> nonBlankAccnosFiles;
+ //delete blank accnos files generated with multiple processes
+ for(int i=0;i<processors;i++){
+ if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+ nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+ }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); }
+ }
+
+ //append accnos files
+ if (nonBlankAccnosFiles.size() != 0) {
+ rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+
+ for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+ appendFiles(nonBlankAccnosFiles[h], accnosFileName);
+ remove(nonBlankAccnosFiles[h].c_str());
+ }
+ }else{ hasAccnos = false; }
+
+ if (m->control_pressed) {
+ remove(outputFileName.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
}
- }
- #else
-
- //break up file
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
+ #else
ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
+ openInputFile(fastaFileNames[s], inFASTA);
getNumSeqs(inFASTA, numSeqs);
inFASTA.close();
-
lines.push_back(new linePair(0, numSeqs));
- driver(lines[0], outputFileName, fastafile, accnosFileName);
+ driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
+ remove(outputFileName.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
}
//delete accnos file if its blank
if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
-
- }else{
- vector<int> positions;
- processIDS.resize(0);
-
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
-
- createProcesses(outputFileName, fastafile, accnosFileName);
-
- rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
-
- //append output files
- for(int i=1;i<processors;i++){
- appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
- remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
- }
-
- vector<string> nonBlankAccnosFiles;
- //delete blank accnos files generated with multiple processes
- for(int i=0;i<processors;i++){
- if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
- nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
- }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); }
- }
-
- //append accnos files
- if (nonBlankAccnosFiles.size() != 0) {
- rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
-
- for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
- appendFiles(nonBlankAccnosFiles[h], accnosFileName);
- remove(nonBlankAccnosFiles[h].c_str());
- }
- }else{ hasAccnos = false; }
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
- }
-
- #else
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
+ #endif
- driver(lines[0], outputFileName, fastafile, accnosFileName);
+ #endif
+
+ delete chimera;
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ outputNames.push_back(outputFileName);
+ if (hasAccnos) { outputNames.push_back(accnosFileName); }
- //delete accnos file if its blank
- if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
- #endif
-
- #endif
-
- delete chimera;
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ m->mothurOutEndLine();
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine();
- if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); }
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
- m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-
+
return 0;
}
#endif
bool abort, filter, MPIWroteAccnos;
- string fastafile, templatefile, consfile, quanfile, maskfile, outputDir;
+ string fastafile, templatefile, consfile, quanfile, maskfile, outputDir, inputDir;
int processors, window, increment, numSeqs, templateSeqsLength;
Chimera* chimera;
+ vector<string> outputNames;
+ vector<string> fastaFileNames;
};
if (inputDir == "not found"){ inputDir = ""; }
else {
string path;
- it = parameters.find("fasta");
- //user has given a template file
- if(it != parameters.end()){
- path = hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["fasta"] = inputDir + it->second; }
- }
-
it = parameters.find("template");
//user has given a template file
if(it != parameters.end()){
//check for required parameters
- fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
- else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true; }
+ fastafile = validParameter.validFile(parameters, "fasta", false);
+ if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true; }
+ else {
+ splitAtDash(fastafile, fastaFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+ if (inputDir != "") {
+ string path = hasPath(fastaFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ #endif
+
+ ableToOpen = openInputFile(fastaFileNames[i], in);
+ in.close();
+
+ #ifdef USE_MPI
+ for (int j = 1; j < processors; j++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Status status;
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ }
+
+ #endif
+
+ if (ableToOpen == 1) {
+ m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ fastaFileNames.erase(fastaFileNames.begin()+i);
+ i--;
+ }
+ }
+
+ //make sure there is at least one valid file left
+ if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+ }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
m->mothurOut("This command was modeled after the chimeraSlayer written by the Broad Institute.\n");
m->mothurOut("The chimera.slayer command parameters are fasta, template, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+ m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
#ifdef USE_MPI
m->mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n");
m->mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
m->mothurOut("The search parameter allows you to specify search method for finding the closest parent. Choices are distance, blast, and kmer, default distance. \n");
- //m->mothurOut("The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default false. Found to make results worse. \n");
+ m->mothurOut("The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default false. \n");
m->mothurOut("The chimera.slayer command should be in the following format: \n");
m->mothurOut("chimera.slayer(fasta=yourFastaFile, template=yourTemplate, search=yourSearch) \n");
m->mothurOut("Example: chimera.slayer(fasta=AD.align, template=core_set_aligned.imputed.fasta, search=kmer) \n");
if (abort == true) { return 0; }
- int start = time(NULL);
-
- chimera = new ChimeraSlayer(fastafile, templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
-
- string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.chimeras";
- string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.accnos";
- bool hasAccnos = true;
-
- if (m->control_pressed) { delete chimera; return 0; }
-
- if (chimera->getUnaligned()) {
- m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
- delete chimera;
- return 0;
- }
- templateSeqsLength = chimera->getLength();
+ for (int s = 0; s < fastaFileNames.size(); s++) {
+
+ m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
- #ifdef USE_MPI
- int pid, end, numSeqsPerProcessor;
- int tag = 2001;
- vector<long> MPIPos;
- MPIWroteAccnos = false;
-
- MPI_Status status;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
- MPI_File inMPI;
- MPI_File outMPI;
- MPI_File outMPIAccnos;
+ int start = time(NULL);
- int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
- int inMode=MPI_MODE_RDONLY;
+ chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
+
+ string outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "slayer.chimeras";
+ string accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "slayer.accnos";
+ bool hasAccnos = true;
- //char* outFilename = new char[outputFileName.length()];
- //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
+ if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
- char outFilename[1024];
- strcpy(outFilename, outputFileName.c_str());
-
- //char* outAccnosFilename = new char[accnosFileName.length()];
- //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
+ if (chimera->getUnaligned()) {
+ m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
+ delete chimera;
+ return 0;
+ }
+ templateSeqsLength = chimera->getLength();
- char outAccnosFilename[1024];
- strcpy(outAccnosFilename, accnosFileName.c_str());
+ #ifdef USE_MPI
+ int pid, end, numSeqsPerProcessor;
+ int tag = 2001;
+ vector<long> MPIPos;
+ MPIWroteAccnos = false;
+
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
- //char* inFileName = new char[fastafile.length()];
- //memcpy(inFileName, fastafile.c_str(), fastafile.length());
-
- char inFileName[1024];
- strcpy(inFileName, fastafile.c_str());
+ MPI_File inMPI;
+ MPI_File outMPI;
+ MPI_File outMPIAccnos;
+
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+ int inMode=MPI_MODE_RDONLY;
+
+ char outFilename[1024];
+ strcpy(outFilename, outputFileName.c_str());
+
+ char outAccnosFilename[1024];
+ strcpy(outAccnosFilename, accnosFileName.c_str());
+
+ char inFileName[1024];
+ strcpy(inFileName, fastaFileNames[s].c_str());
- MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
- MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
- MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
-
- //delete inFileName;
- //delete outFilename;
- //delete outAccnosFilename;
+ MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+ MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ if (pid == 0) { //you are the root process
+ m->mothurOutEndLine();
+ m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
+ m->mothurOutEndLine();
- if (pid == 0) { //you are the root process
- m->mothurOutEndLine();
- m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
- m->mothurOutEndLine();
-
- string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
-
- //print header
- int length = outTemp.length();
- char* buf2 = new char[length];
- memcpy(buf2, outTemp.c_str(), length);
+ string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
+
+ //print header
+ int length = outTemp.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outTemp.c_str(), length);
- MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
- delete buf2;
+ MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
+ delete buf2;
- MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+ MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ //do your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; }
+
+ for (int i = 1; i < processors; i++) {
+ bool tempResult;
+ MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+ if (tempResult != 0) { MPIWroteAccnos = true; }
+ }
+ }else{ //you are a child process
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ MPIPos.resize(numSeqs+1);
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+
+ //do your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+
+ MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
+ //close files
+ MPI_File_close(&inMPI);
+ MPI_File_close(&outMPI);
+ MPI_File_close(&outMPIAccnos);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ //delete accnos file if blank
+ if (pid == 0) {
+ if (!MPIWroteAccnos) {
+ hasAccnos = false;
+ remove(accnosFileName.c_str());
+ }
+ }
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+ #else
+ ofstream outHeader;
+ string tempHeader = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "slayer.chimeras.tempHeader";
+ openOutputFile(tempHeader, outHeader);
+
+ chimera->printHeader(outHeader);
+ outHeader.close();
+
+ //break up file
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[s], inFASTA);
+ getNumSeqs(inFASTA, numSeqs);
+ inFASTA.close();
+
+ lines.push_back(new linePair(0, numSeqs));
+
+ driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+
+ if (m->control_pressed) {
+ remove(outputFileName.c_str());
+ remove(tempHeader.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
+
+ //delete accnos file if its blank
+ if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
+
+ }else{
+ vector<int> positions;
+ processIDS.resize(0);
+
+ ifstream inFASTA;
+ openInputFile(fastaFileNames[s], inFASTA);
+
+ string input;
+ while(!inFASTA.eof()){
+ input = getline(inFASTA);
+ if (input.length() != 0) {
+ if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+ }
+ }
+ inFASTA.close();
+
+ numSeqs = positions.size();
+
+ int numSeqsPerProcessor = numSeqs / processors;
+
+ for (int i = 0; i < processors; i++) {
+ long int startPos = positions[ i * numSeqsPerProcessor ];
+ if(i == processors - 1){
+ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+ }
+ lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+ }
+
+ createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; }
+ rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+
+ //append output files
+ for(int i=1;i<processors;i++){
+ appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+ remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+ }
+
+ vector<string> nonBlankAccnosFiles;
+ //delete blank accnos files generated with multiple processes
+ for(int i=0;i<processors;i++){
+ if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+ nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+ }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); }
+ }
+
+ //append accnos files
+ if (nonBlankAccnosFiles.size() != 0) {
+ rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+
+ for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+ appendFiles(nonBlankAccnosFiles[h], accnosFileName);
+ remove(nonBlankAccnosFiles[h].c_str());
+ }
+ }else{ hasAccnos = false; }
+
+ if (m->control_pressed) {
+ remove(outputFileName.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
+ }
- for (int i = 1; i < processors; i++) {
- bool tempResult;
- MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
- if (tempResult != 0) { MPIWroteAccnos = true; }
}
- }else{ //you are a child process
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- MPIPos.resize(numSeqs+1);
- MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-
- //figure out how many sequences you have to align
- numSeqsPerProcessor = numSeqs / processors;
- int startIndex = pid * numSeqsPerProcessor;
- if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
-
- //align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; }
- MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
- }
-
- //close files
- MPI_File_close(&inMPI);
- MPI_File_close(&outMPI);
- MPI_File_close(&outMPIAccnos);
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-
- //delete accnos file if blank
- if (pid == 0) {
- if (!MPIWroteAccnos) {
- //MPI_Info info;
- //MPI_File_delete(outAccnosFilename, info);
- hasAccnos = false;
- remove(accnosFileName.c_str());
- }
- }
-
- #else
- ofstream outHeader;
- string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.chimeras.tempHeader";
- openOutputFile(tempHeader, outHeader);
-
- chimera->printHeader(outHeader);
- outHeader.close();
-
- //break up file
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
+ #else
ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
+ openInputFile(fastaFileNames[s], inFASTA);
getNumSeqs(inFASTA, numSeqs);
inFASTA.close();
-
lines.push_back(new linePair(0, numSeqs));
- driver(lines[0], outputFileName, fastafile, accnosFileName);
+ driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
+ remove(outputFileName.c_str());
+ remove(tempHeader.c_str());
+ remove(accnosFileName.c_str());
+ for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ delete chimera;
+ return 0;
}
//delete accnos file if its blank
if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
-
- }else{
- vector<int> positions;
- processIDS.resize(0);
-
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
- createProcesses(outputFileName, fastafile, accnosFileName);
+ #endif
- rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
-
- //append output files
- for(int i=1;i<processors;i++){
- appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
- remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
- }
-
- vector<string> nonBlankAccnosFiles;
- //delete blank accnos files generated with multiple processes
- for(int i=0;i<processors;i++){
- if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
- nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
- }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); }
- }
-
- //append accnos files
- if (nonBlankAccnosFiles.size() != 0) {
- rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
-
- for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
- appendFiles(nonBlankAccnosFiles[h], accnosFileName);
- remove(nonBlankAccnosFiles[h].c_str());
- }
- }else{ hasAccnos = false; }
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
-
- }
-
- #else
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
-
- driver(lines[0], outputFileName, fastafile, accnosFileName);
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ appendFiles(outputFileName, tempHeader);
+
+ remove(outputFileName.c_str());
+ rename(tempHeader.c_str(), outputFileName.c_str());
- //delete accnos file if its blank
- if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
#endif
-
- appendFiles(outputFileName, tempHeader);
-
- remove(outputFileName.c_str());
- rename(tempHeader.c_str(), outputFileName.c_str());
-
- #endif
- delete chimera;
+ delete chimera;
+
+
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+
+ outputNames.push_back(outputFileName);
+ if (hasAccnos) { outputNames.push_back(accnosFileName); }
+
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine();
- if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); }
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
-
- m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-
return 0;
}
float divR;
Chimera* chimera;
+ vector<string> outputNames;
+ vector<string> fastaFileNames;
};