From: westcott Date: Mon, 21 Jun 2010 16:08:42 +0000 (+0000) Subject: modified chimera commands to process multiple fasta files and added checks to pintail... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=52ea8f3a52c05331b9bd7519ae0b518bda233d05 modified chimera commands to process multiple fasta files and added checks to pintail to look for consfile and quantile files to save time. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 8a540e6..1609df9 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -661,12 +661,12 @@ A7D216061199C47F00F13F13 /* catchallcommand.h */, A7D216071199C47F00F13F13 /* catchallcommand.cpp */, A7DA2017113FECD400BF472F /* chimeraseqscommand.cpp */, - A78434881162224F00100BE0 /* chimeraccodecommand.h */, A7DA2018113FECD400BF472F /* chimeraseqscommand.h */, A7825502116519F70002E2DD /* chimerabellerophoncommand.h */, A7825503116519F70002E2DD /* chimerabellerophoncommand.cpp */, A747E79B1163442A00FB9042 /* chimeracheckcommand.h */, A747E79C1163442A00FB9042 /* chimeracheckcommand.cpp */, + A78434881162224F00100BE0 /* chimeraccodecommand.h */, A78434891162224F00100BE0 /* chimeraccodecommand.cpp */, A78254461164D7790002E2DD /* chimerapintailcommand.h */, A78254471164D7790002E2DD /* chimerapintailcommand.cpp */, diff --git a/aligncommand.cpp b/aligncommand.cpp index 1f93068..16006f7 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -267,26 +267,14 @@ int AlignCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - //char* outAlignFilename = new char[alignFileName.length()]; - //memcpy(outAlignFilename, alignFileName.c_str(), alignFileName.length()); - char outAlignFilename[1024]; strcpy(outAlignFilename, alignFileName.c_str()); - - //char* outReportFilename = new char[reportFileName.length()]; - //memcpy(outReportFilename, reportFileName.c_str(), reportFileName.length()); char outReportFilename[1024]; strcpy(outReportFilename, reportFileName.c_str()); - - //char* outAccnosFilename = new char[accnosFileName.length()]; - //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); char outAccnosFilename[1024]; strcpy(outAccnosFilename, accnosFileName.c_str()); - - //char* inFileName = new char[candidateFileNames[s].length()]; - //memcpy(inFileName, candidateFileNames[s].c_str(), candidateFileNames[s].length()); char inFileName[1024]; strcpy(inFileName, candidateFileNames[s].c_str()); diff --git a/chimerabellerophoncommand.cpp b/chimerabellerophoncommand.cpp index 668956f..d7f60e7 100644 --- a/chimerabellerophoncommand.cpp +++ b/chimerabellerophoncommand.cpp @@ -38,22 +38,57 @@ ChimeraBellerophonCommand::ChimeraBellerophonCommand(string option) { //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } - else { - string path; - it = parameters.find("fasta"); - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["fasta"] = inputDir + it->second; } + + fastafile = validParameter.validFile(parameters, "fasta", false); + if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.bellerophon command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(fastafile, fastaFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < fastaFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(fastaFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ableToOpen = openInputFile(fastaFileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + + if (ableToOpen == 1) { + m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + fastaFileNames.erase(fastaFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } } - - - //check for required parameters - fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.bellerophon command."); m->mothurOutEndLine(); abort = true; } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -89,6 +124,7 @@ void ChimeraBellerophonCommand::help(){ try { m->mothurOut("The chimera.bellerophon command reads a fastafile and creates list of potentially chimeric sequences.\n"); m->mothurOut("The chimera.bellerophon command parameters are fasta, filter, correction, processors, window, increment. The fasta parameter is required.\n"); + m->mothurOut("The fasta parameter is required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter, default=false. \n"); m->mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs, default=true.\n"); m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"); @@ -98,7 +134,7 @@ void ChimeraBellerophonCommand::help(){ m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default is 1/4 sequence length. \n"); m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default is 25.\n"); m->mothurOut("chimera.bellerophon(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors) \n"); - m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, window=200) \n"); + m->mothurOut("Example: chimera.bellerophon(fasta=AD.align, filter=True, correction=true, window=200) \n"); m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n"); } catch(exception& e) { @@ -118,74 +154,73 @@ int ChimeraBellerophonCommand::execute(){ if (abort == true) { return 0; } - int start = time(NULL); - - chimera = new Bellerophon(fastafile, filter, correction, window, increment, processors, outputDir); - - string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "bellerophon.chimeras"; - string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "bellerophon.accnos"; - bool hasAccnos = true; - - chimera->getChimeras(); - - if (m->control_pressed) { delete chimera; return 0; } - - #ifdef USE_MPI - MPI_File outMPI; - MPI_File outMPIAccnos; - - int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - - //char* outFilename = new char[accnosFileName.length()]; - //memcpy(outFilename, accnosFileName.c_str(), accnosFileName.length()); - - char outFilename[1024]; - strcpy(outFilename, accnosFileName.c_str()); - - //char* FileName = new char[outputFileName.length()]; - //memcpy(FileName, outputFileName.c_str(), outputFileName.length()); - - char FileName[1024]; - strcpy(FileName, outputFileName.c_str()); - - MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI); //comm, filename, mode, info, filepointer - MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); - - //delete FileName; - //delete outFilename; + for (int i = 0; i < fastaFileNames.size(); i++) { + + m->mothurOut("Checking sequences from " + fastaFileNames[i] + " ..." ); m->mothurOutEndLine(); + + int start = time(NULL); + + chimera = new Bellerophon(fastaFileNames[i], filter, correction, window, increment, processors, outputDir); + + string outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "bellerophon.chimeras"; + string accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "bellerophon.accnos"; + bool hasAccnos = true; + + chimera->getChimeras(); + + if (m->control_pressed) { delete chimera; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + #ifdef USE_MPI + MPI_File outMPI; + MPI_File outMPIAccnos; + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + + char outFilename[1024]; + strcpy(outFilename, accnosFileName.c_str()); - numSeqs = chimera->print(outMPI, outMPIAccnos); - - MPI_File_close(&outMPI); - MPI_File_close(&outMPIAccnos); + char FileName[1024]; + strcpy(FileName, outputFileName.c_str()); - #else + MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI); //comm, filename, mode, info, filepointer + MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); - ofstream out; - openOutputFile(outputFileName, out); - - ofstream out2; - openOutputFile(accnosFileName, out2); - - numSeqs = chimera->print(out, out2); - out.close(); - out2.close(); - - #endif - - if (m->control_pressed) { remove(accnosFileName.c_str()); remove(outputFileName.c_str()); delete chimera; return 0; } + numSeqs = chimera->print(outMPI, outMPIAccnos); + + MPI_File_close(&outMPI); + MPI_File_close(&outMPIAccnos); + + #else - //delete accnos file if its blank - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + ofstream out; + openOutputFile(outputFileName, out); + + ofstream out2; + openOutputFile(accnosFileName, out2); + + numSeqs = chimera->print(out, out2); + out.close(); + out2.close(); + + #endif + + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(outputFileName.c_str()); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete chimera; return 0; } + + //delete accnos file if its blank + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); + + outputNames.push_back(outputFileName); + if (hasAccnos) { outputNames.push_back(accnosFileName); } + + delete chimera; + } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); - if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); } + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); - - delete chimera; return 0; diff --git a/chimerabellerophoncommand.h b/chimerabellerophoncommand.h index e450b52..4ff7f79 100644 --- a/chimerabellerophoncommand.h +++ b/chimerabellerophoncommand.h @@ -30,6 +30,8 @@ private: string fastafile, outputDir; int processors, window, increment, numSeqs; Chimera* chimera; + vector outputNames; + vector fastaFileNames; }; /***********************************************************/ diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp index 280fd8f..2b46f2b 100644 --- a/chimeraccodecommand.cpp +++ b/chimeraccodecommand.cpp @@ -40,14 +40,6 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option) { if (inputDir == "not found"){ inputDir = ""; } else { string path; - it = parameters.find("fasta"); - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["fasta"] = inputDir + it->second; } - } - it = parameters.find("template"); //user has given a template file if(it != parameters.end()){ @@ -57,11 +49,56 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option) { } } - //check for required parameters - fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true; } + fastafile = validParameter.validFile(parameters, "fasta", false); + if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(fastafile, fastaFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < fastaFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(fastaFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ableToOpen = openInputFile(fastaFileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + + if (ableToOpen == 1) { + m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + fastaFileNames.erase(fastaFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -117,6 +154,7 @@ void ChimeraCcodeCommand::help(){ m->mothurOut("This command was created using the algorythms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n"); m->mothurOut("The chimera.ccode command parameters are fasta, template, filter, mask, processors, window and numwanted.\n"); m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n"); + m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n"); m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n"); m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"); @@ -128,7 +166,7 @@ void ChimeraCcodeCommand::help(){ m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n"); m->mothurOut("The chimera.ccode command should be in the following format: \n"); m->mothurOut("chimera.ccode(fasta=yourFastaFile, template=yourTemplate) \n"); - m->mothurOut("Example: chimera.seqs(fasta=AD.align, template=core_set_aligned.imputed.fasta) \n"); + m->mothurOut("Example: chimera.ccode(fasta=AD.align, template=core_set_aligned.imputed.fasta) \n"); m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n"); } catch(exception& e) { @@ -148,293 +186,283 @@ int ChimeraCcodeCommand::execute(){ if (abort == true) { return 0; } - int start = time(NULL); - - //set user options - if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); } - - chimera = new Ccode(fastafile, templatefile, filter, maskfile, window, numwanted, outputDir); - - //is your template aligned? - if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; } - templateSeqsLength = chimera->getLength(); - - string outputFileName, accnosFileName; - if (maskfile != "") { - outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".ccode.chimeras"; - accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".ccode.accnos"; - }else { - outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "ccode.chimeras"; - accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "ccode.accnos"; - } - - string mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo"; - bool hasAccnos = true; - - if (m->control_pressed) { delete chimera; return 0; } + for (int s = 0; s < fastaFileNames.size(); s++) { + + m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); - #ifdef USE_MPI - - int pid, end, numSeqsPerProcessor; - int tag = 2001; - vector MPIPos; - MPIWroteAccnos = false; - - MPI_Status status; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - MPI_File inMPI; - MPI_File outMPI; - MPI_File outMPIAccnos; + int start = time(NULL); - int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - int inMode=MPI_MODE_RDONLY; + //set user options + if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); } - //char* outFilename = new char[outputFileName.length()]; - //memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); - - char outFilename[1024]; - strcpy(outFilename, outputFileName.c_str()); + chimera = new Ccode(fastaFileNames[s], templatefile, filter, maskfile, window, numwanted, outputDir); - //char* outAccnosFilename = new char[accnosFileName.length()]; - //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); + //is your template aligned? + if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; } + templateSeqsLength = chimera->getLength(); - char outAccnosFilename[1024]; - strcpy(outAccnosFilename, accnosFileName.c_str()); + string outputFileName, accnosFileName; + if (maskfile != "") { + outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.chimeras"; + accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.accnos"; + }else { + outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "ccode.chimeras"; + accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "ccode.accnos"; + } - //char* inFileName = new char[fastafile.length()]; - //memcpy(inFileName, fastafile.c_str(), fastafile.length()); + string mapInfo = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "mapinfo"; + bool hasAccnos = true; - char inFileName[1024]; - strcpy(inFileName, fastafile.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); - MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - //delete inFileName; - //delete outFilename; - //delete outAccnosFilename; - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } + #ifdef USE_MPI - if (pid == 0) { //you are the root process - string outTemp = "For full window mapping info refer to " + mapInfo + "\n\n"; + int pid, end, numSeqsPerProcessor; + int tag = 2001; + vector MPIPos; + MPIWroteAccnos = false; - //print header - int length = outTemp.length(); - char* buf2 = new char[length]; - memcpy(buf2, outTemp.c_str(), length); + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); - MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status); - delete buf2; - - MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs + MPI_File inMPI; + MPI_File outMPI; + MPI_File outMPIAccnos; - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); - } + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + int inMode=MPI_MODE_RDONLY; - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + char outFilename[1024]; + strcpy(outFilename, outputFileName.c_str()); + char outAccnosFilename[1024]; + strcpy(outAccnosFilename, accnosFileName.c_str()); + + char inFileName[1024]; + strcpy(inFileName, fastaFileNames[s].c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); + MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + if (pid == 0) { //you are the root process + string outTemp = "For full window mapping info refer to " + mapInfo + "\n\n"; + + //print header + int length = outTemp.length(); + char* buf2 = new char[length]; + memcpy(buf2, outTemp.c_str(), length); + + MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status); + delete buf2; + + MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; } + //align your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } - for (int i = 1; i < processors; i++) { - bool tempResult; - MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); - if (tempResult != 0) { MPIWroteAccnos = true; } + for (int i = 1; i < processors; i++) { + bool tempResult; + MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); + if (tempResult != 0) { MPIWroteAccnos = true; } + } + }else{ //you are a child process + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + MPIPos.resize(numSeqs+1); + MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + + + //align your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); } - }else{ //you are a child process - MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - MPIPos.resize(numSeqs+1); - MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); - - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + //close files + MPI_File_close(&inMPI); + MPI_File_close(&outMPI); + MPI_File_close(&outMPIAccnos); - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } - - MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); - } - - //close files - MPI_File_close(&inMPI); - MPI_File_close(&outMPI); - MPI_File_close(&outMPIAccnos); + //delete accnos file if blank + if (pid == 0) { + if (!MPIWroteAccnos) { + hasAccnos = false; + remove(accnosFileName.c_str()); + } + } + + #else + ofstream outHeader; + string tempHeader = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + "ccode.chimeras.tempHeader"; + openOutputFile(tempHeader, outHeader); - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case + outHeader << "For full window mapping info refer to " << mapInfo << endl << endl; + + outHeader.close(); - //delete accnos file if blank - if (pid == 0) { - if (!MPIWroteAccnos) { - //MPI_Info info; - //MPI_File_delete(outAccnosFilename, info); - hasAccnos = false; - remove(accnosFileName.c_str()); + //break up file + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + ifstream inFASTA; + openInputFile(fastaFileNames[s], inFASTA); + getNumSeqs(inFASTA, numSeqs); + inFASTA.close(); + + lines.push_back(new linePair(0, numSeqs)); + + driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); + + if (m->control_pressed) { + remove(outputFileName.c_str()); + remove(tempHeader.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; + } + + //delete accnos file if its blank + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + + }else{ + vector positions; + processIDS.resize(0); + + ifstream inFASTA; + openInputFile(fastaFileNames[s], inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + numSeqs = positions.size(); + + int numSeqsPerProcessor = numSeqs / processors; + + for (int i = 0; i < processors; i++) { + long int startPos = positions[ i * numSeqsPerProcessor ]; + if(i == processors - 1){ + numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + + createProcesses(outputFileName, fastaFileNames[s], accnosFileName); + + rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); + + //append output files + for(int i=1;i nonBlankAccnosFiles; + //delete blank accnos files generated with multiple processes + for(int i=0;icontrol_pressed) { + remove(outputFileName.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; + } + } - } - - #else - ofstream outHeader; - string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + "ccode.chimeras.tempHeader"; - openOutputFile(tempHeader, outHeader); - - outHeader << "For full window mapping info refer to " << mapInfo << endl << endl; - outHeader.close(); - - //break up file - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ + #else ifstream inFASTA; - openInputFile(fastafile, inFASTA); + openInputFile(fastaFileNames[s], inFASTA); getNumSeqs(inFASTA, numSeqs); inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); - driver(lines[0], outputFileName, fastafile, accnosFileName); + driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); if (m->control_pressed) { - remove(outputFileName.c_str()); - remove(tempHeader.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; + remove(outputFileName.c_str()); + remove(tempHeader.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; } //delete accnos file if its blank if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } - - }else{ - vector positions; - processIDS.resize(0); - - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } - } - } - inFASTA.close(); - - numSeqs = positions.size(); - - int numSeqsPerProcessor = numSeqs / processors; - - for (int i = 0; i < processors; i++) { - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } - - - createProcesses(outputFileName, fastafile, accnosFileName); - - rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); - - //append output files - for(int i=1;i nonBlankAccnosFiles; - //delete blank accnos files generated with multiple processes - for(int i=0;icontrol_pressed) { - remove(outputFileName.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } - - } - - #else - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - getNumSeqs(inFASTA, numSeqs); - inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); + #endif + + appendFiles(outputFileName, tempHeader); + + remove(outputFileName.c_str()); + rename(tempHeader.c_str(), outputFileName.c_str()); + #endif + + delete chimera; - driver(lines[0], outputFileName, fastafile, accnosFileName); + outputNames.push_back(outputFileName); + outputNames.push_back(mapInfo); + if (hasAccnos) { outputNames.push_back(accnosFileName); } - if (m->control_pressed) { - remove(outputFileName.c_str()); - remove(tempHeader.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - //delete accnos file if its blank - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } - #endif - - //m->mothurOut("Output File Names: "); - //if ((filter) && (method == "bellerophon")) { m->mothurOut( - //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; } - // else { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; } - - appendFiles(outputFileName, tempHeader); - - remove(outputFileName.c_str()); - rename(tempHeader.c_str(), outputFileName.c_str()); - #endif - - delete chimera; - + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); - if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); } + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); - - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); return 0; diff --git a/chimeraccodecommand.h b/chimeraccodecommand.h index 9a0efb9..a07178e 100644 --- a/chimeraccodecommand.h +++ b/chimeraccodecommand.h @@ -46,6 +46,8 @@ private: string fastafile, templatefile, outputDir, maskfile; int processors, window, numwanted, numSeqs, templateSeqsLength; Chimera* chimera; + vector fastaFileNames; + vector outputNames; }; diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp index 4947dc9..25f9c0f 100644 --- a/chimeracheckcommand.cpp +++ b/chimeracheckcommand.cpp @@ -39,51 +39,128 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option) { string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { - string path; - it = parameters.find("fasta"); - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["fasta"] = inputDir + it->second; } - } - it = parameters.find("template"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + string path = hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["template"] = inputDir + it->second; } } - - it = parameters.find("name"); - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["name"] = inputDir + it->second; } - } } - //check for required parameters - fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; } + fastafile = validParameter.validFile(parameters, "fasta", false); + if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(fastafile, fastaFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < fastaFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(fastaFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ableToOpen = openInputFile(fastaFileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + + if (ableToOpen == 1) { + m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + fastaFileNames.erase(fastaFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it - } + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } templatefile = validParameter.validFile(parameters, "template", true); if (templatefile == "not open") { abort = true; } else if (templatefile == "not found") { templatefile = ""; m->mothurOut("template is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; } - namefile = validParameter.validFile(parameters, "name", true); - if (namefile == "not open") { abort = true; } - else if (namefile == "not found") { namefile = ""; } + namefile = validParameter.validFile(parameters, "name", false); + if (namefile == "not found") { namefile = ""; } + else { + splitAtDash(namefile, nameFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < nameFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(nameFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ableToOpen = openInputFile(nameFileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + + if (ableToOpen == 1) { + m->mothurOut(nameFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + nameFileNames.erase(nameFileNames.begin()+i); + i--; + } + + } + + //make sure there is at least one valid file left + if (nameFileNames.size() != 0) { + if (nameFileNames.size() != fastaFileNames.size()) { + m->mothurOut("Different number of valid name files and fasta files, aborting command."); m->mothurOutEndLine(); + abort = true; + } + } + } string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); @@ -93,6 +170,7 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option) { temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; } svg = isTrue(temp); + if (nameFileNames.size() != 0) { svg = true; } temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "10"; } convert(temp, increment); @@ -112,6 +190,7 @@ void ChimeraCheckCommand::help(){ m->mothurOut("This command was created using the algorythms described in CHIMERA_CHECK version 2.7 written by Niels Larsen. \n"); m->mothurOut("The chimera.check command parameters are fasta, template, processors, ksize, increment, svg and name.\n"); m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n"); + m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n"); m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"); #ifdef USE_MPI @@ -121,6 +200,7 @@ void ChimeraCheckCommand::help(){ m->mothurOut("The ksize parameter allows you to input kmersize, default is 7. \n"); m->mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n"); m->mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n"); + m->mothurOut("You may enter multiple name files by separating their names with dashes. ie. fasta=abrecovery.svg.names-amzon.svg.names \n"); m->mothurOut("The chimera.check command should be in the following format: \n"); m->mothurOut("chimera.check(fasta=yourFastaFile, template=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n"); m->mothurOut("Example: chimera.check(fasta=AD.fasta, template=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n"); @@ -143,197 +223,197 @@ int ChimeraCheckCommand::execute(){ if (abort == true) { return 0; } - int start = time(NULL); - - chimera = new ChimeraCheckRDP(fastafile, templatefile, namefile, svg, increment, ksize, outputDir); - - if (m->control_pressed) { delete chimera; return 0; } - - string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "chimeracheck.chimeras"; - - #ifdef USE_MPI - - int pid, end, numSeqsPerProcessor; - int tag = 2001; - vector MPIPos; + for (int i = 0; i < fastaFileNames.size(); i++) { + + m->mothurOut("Checking sequences from " + fastaFileNames[i] + " ..." ); m->mothurOutEndLine(); - MPI_Status status; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - MPI_File inMPI; - MPI_File outMPI; - - int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - int inMode=MPI_MODE_RDONLY; + int start = time(NULL); - //char* outFilename = new char[outputFileName.length()]; - //memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); + string thisNameFile = ""; + if (nameFileNames.size() != 0) { thisNameFile = nameFileNames[i]; } - char outFilename[1024]; - strcpy(outFilename, outputFileName.c_str()); + chimera = new ChimeraCheckRDP(fastaFileNames[i], templatefile, thisNameFile, svg, increment, ksize, outputDir); - //char* inFileName = new char[fastafile.length()]; - //memcpy(inFileName, fastafile.c_str(), fastafile.length()); + if (m->control_pressed) { delete chimera; return 0; } - char inFileName[1024]; - strcpy(inFileName, fastafile.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); + string outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "chimeracheck.chimeras"; + outputNames.push_back(outputFileName); - //delete outFilename; - //delete inFileName; + #ifdef USE_MPI + + int pid, end, numSeqsPerProcessor; + int tag = 2001; + vector MPIPos; + + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); delete chimera; return 0; } + MPI_File inMPI; + MPI_File outMPI; + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + int inMode=MPI_MODE_RDONLY; + + char outFilename[1024]; + strcpy(outFilename, outputFileName.c_str()); - if (pid == 0) { //you are the root process - MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs - - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); - } - - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + char inFileName[1024]; + strcpy(inFileName, fastaFileNames[i].c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); - - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); remove(outputFileName.c_str()); delete chimera; return 0; } + if (pid == 0) { //you are the root process + MPIPos = setFilePosFasta(fastaFileNames[i], numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int j = 1; j < processors; j++) { + MPI_Send(&numSeqs, 1, MPI_INT, j, tag, MPI_COMM_WORLD); + MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, j, tag, MPI_COMM_WORLD); + } + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + - //wait on chidren - for(int i = 1; i < processors; i++) { + //align your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + //wait on chidren + for(int j = 1; j < processors; j++) { + char buf[4]; + MPI_Recv(buf, 4, MPI_CHAR, j, tag, MPI_COMM_WORLD, &status); + } + }else{ //you are a child process + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + MPIPos.resize(numSeqs+1); + MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + + //align your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + //tell parent you are done. char buf[4]; - MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); + strcpy(buf, "done"); + MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } - }else{ //you are a child process - MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - MPIPos.resize(numSeqs+1); - MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); - - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } - - - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); delete chimera; return 0; } - - //tell parent you are done. - char buf[4]; - strcpy(buf, "done"); - MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD); - } + //close files + MPI_File_close(&inMPI); + MPI_File_close(&outMPI); + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case + #else - //close files - MPI_File_close(&inMPI); - MPI_File_close(&outMPI); - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case - #else - - //break up file - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ + //break up file + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + ifstream inFASTA; + openInputFile(fastaFileNames[i], inFASTA); + getNumSeqs(inFASTA, numSeqs); + inFASTA.close(); + + lines.push_back(new linePair(0, numSeqs)); + + driver(lines[0], outputFileName, fastaFileNames[i]); + + if (m->control_pressed) { + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); + delete chimera; + return 0; + } + + }else{ + vector positions; + processIDS.resize(0); + + ifstream inFASTA; + openInputFile(fastaFileNames[i], inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + numSeqs = positions.size(); + + int numSeqsPerProcessor = numSeqs / processors; + + for (int j = 0; j < processors; j++) { + long int startPos = positions[ j * numSeqsPerProcessor ]; + if(j == processors - 1){ + numSeqsPerProcessor = numSeqs - j * numSeqsPerProcessor; + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + + createProcesses(outputFileName, fastaFileNames[i]); + + rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); + + //append output files + for(int j=1;jcontrol_pressed) { + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); + delete chimera; + return 0; + } + } + + #else ifstream inFASTA; - openInputFile(fastafile, inFASTA); + openInputFile(fastaFileNames[i], inFASTA); getNumSeqs(inFASTA, numSeqs); inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); - driver(lines[0], outputFileName, fastafile); + driver(lines[0], outputFileName, fastaFileNames[i]); if (m->control_pressed) { - remove(outputFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } - - }else{ - vector positions; - processIDS.resize(0); - - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } - } + for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + delete chimera; + return 0; } - inFASTA.close(); - - numSeqs = positions.size(); - - int numSeqsPerProcessor = numSeqs / processors; - - for (int i = 0; i < processors; i++) { - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } - - - createProcesses(outputFileName, fastafile); + #endif + #endif + delete chimera; + for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); - rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); - - //append output files - for(int i=1;icontrol_pressed) { - remove(outputFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } - } + m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine(); + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); - #else - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - getNumSeqs(inFASTA, numSeqs); - inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); - - driver(lines[0], outputFileName, fastafile); - - if (m->control_pressed) { - remove(outputFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } - #endif - #endif - delete chimera; - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - - m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine(); + } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); - + return 0; } diff --git a/chimeracheckcommand.h b/chimeracheckcommand.h index 6db61bd..b3b49d3 100644 --- a/chimeracheckcommand.h +++ b/chimeracheckcommand.h @@ -46,8 +46,10 @@ private: string fastafile, templatefile, namefile, outputDir; int processors, increment, ksize, numSeqs, templateSeqsLength; Chimera* chimera; - - + vector fastaFileNames; + vector nameFileNames; + vector outputNames; + }; /***********************************************************/ diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index c43dd38..4cfc3bc 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -36,18 +36,10 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option) { } //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); + inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { string path; - it = parameters.find("fasta"); - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["fasta"] = inputDir + it->second; } - } - it = parameters.find("template"); //user has given a template file if(it != parameters.end()){ @@ -75,27 +67,68 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option) { //check for required parameters - fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; } - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it - } + fastafile = validParameter.validFile(parameters, "fasta", false); + if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(fastafile, fastaFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < fastaFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(fastaFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif - templatefile = validParameter.validFile(parameters, "template", true); - if (templatefile == "not open") { abort = true; } - else if (templatefile == "not found") { templatefile = ""; m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; } + ableToOpen = openInputFile(fastaFileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + + if (ableToOpen == 1) { + m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + fastaFileNames.erase(fastaFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } - consfile = validParameter.validFile(parameters, "conservation", true); - if (consfile == "not open") { abort = true; } - else if (consfile == "not found") { consfile = ""; } + string temp; + temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; } + filter = isTrue(temp); - quanfile = validParameter.validFile(parameters, "quantile", true); - if (quanfile == "not open") { abort = true; } - else if (quanfile == "not found") { quanfile = ""; } + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } + convert(temp, processors); + + temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; } + convert(temp, window); + + temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; } + convert(temp, increment); maskfile = validParameter.validFile(parameters, "mask", false); if (maskfile == "not found") { maskfile = ""; } @@ -111,19 +144,31 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option) { if (ableToOpen == 1) { abort = true; } in.close(); } - - string temp; - temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; } - filter = isTrue(temp); + - temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } - convert(temp, processors); + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it + } + + templatefile = validParameter.validFile(parameters, "template", true); + if (templatefile == "not open") { abort = true; } + else if (templatefile == "not found") { templatefile = ""; m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; } - temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; } - convert(temp, window); + consfile = validParameter.validFile(parameters, "conservation", true); + if (consfile == "not open") { abort = true; } + else if (consfile == "not found") { + consfile = ""; + //check for consfile + string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq"; + ifstream FileTest(tempConsFile.c_str()); + if(FileTest){ m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close(); } + } - temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; } - convert(temp, increment); + quanfile = validParameter.validFile(parameters, "quantile", true); + if (quanfile == "not open") { abort = true; } + else if (quanfile == "not found") { quanfile = ""; } } } catch(exception& e) { @@ -140,6 +185,7 @@ void ChimeraPintailCommand::help(){ m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n"); m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n"); m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n"); + m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n"); m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n"); m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n"); @@ -152,8 +198,8 @@ void ChimeraPintailCommand::help(){ m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n"); m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n"); m->mothurOut("The chimera.pintail command should be in the following format: \n"); - m->mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n"); - m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n"); + m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n"); + m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n"); m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n"); } catch(exception& e) { @@ -173,262 +219,271 @@ int ChimeraPintailCommand::execute(){ if (abort == true) { return 0; } - int start = time(NULL); - - //set user options - if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); } - - chimera = new Pintail(fastafile, templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir); - - string outputFileName, accnosFileName; - if (maskfile != "") { - outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.chimeras"; - accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.accnos"; - }else { - outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "pintail.chimeras"; - accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "pintail.accnos"; - } - bool hasAccnos = true; - - if (m->control_pressed) { delete chimera; return 0; } - - if (chimera->getUnaligned()) { - m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); - delete chimera; - return 0; - } - templateSeqsLength = chimera->getLength(); - - #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; - int tag = 2001; - vector MPIPos; - MPIWroteAccnos = false; - - MPI_Status status; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); + for (int s = 0; s < fastaFileNames.size(); s++) { + + m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); - MPI_File inMPI; - MPI_File outMPI; - MPI_File outMPIAccnos; - - int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - int inMode=MPI_MODE_RDONLY; - - //char* outFilename = new char[outputFileName.length()]; - //memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); + int start = time(NULL); + + //set user options + if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); } + + //check for quantile to save the time + string tempQuan = ""; + if ((!filter) && (maskfile == "")) { + tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.quan"; + }else if ((!filter) && (maskfile != "")) { + tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.masked.quan"; + }else if ((filter) && (maskfile != "")) { + tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "masked.quan"; + }else if ((filter) && (maskfile == "")) { + tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "quan"; + } - char outFilename[1024]; - strcpy(outFilename, outputFileName.c_str()); + ifstream FileTest(tempQuan.c_str()); + if(FileTest){ m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close(); } - //char* outAccnosFilename = new char[accnosFileName.length()]; - //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); + chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir); - char outAccnosFilename[1024]; - strcpy(outAccnosFilename, accnosFileName.c_str()); - - //char* inFileName = new char[fastafile.length()]; - //memcpy(inFileName, fastafile.c_str(), fastafile.length()); + string outputFileName, accnosFileName; + if (maskfile != "") { + outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.chimeras"; + accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.accnos"; + }else { + outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "pintail.chimeras"; + accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "pintail.accnos"; + } + bool hasAccnos = true; - char inFileName[1024]; - strcpy(inFileName, fastafile.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); - MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - //delete inFileName; - //delete outFilename; - //delete outAccnosFilename; - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } + if (chimera->getUnaligned()) { + m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); + delete chimera; + return 0; + } + templateSeqsLength = chimera->getLength(); + + #ifdef USE_MPI + int pid, end, numSeqsPerProcessor; + int tag = 2001; + vector MPIPos; + MPIWroteAccnos = false; + + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); - if (pid == 0) { //you are the root process - - MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs + MPI_File inMPI; + MPI_File outMPI; + MPI_File outMPIAccnos; - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); - } + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + int inMode=MPI_MODE_RDONLY; - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + char outFilename[1024]; + strcpy(outFilename, outputFileName.c_str()); - - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; } - - for (int i = 1; i < processors; i++) { - bool tempResult; - MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); - if (tempResult != 0) { MPIWroteAccnos = true; } - } - }else{ //you are a child process - MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - MPIPos.resize(numSeqs+1); - MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + char outAccnosFilename[1024]; + strcpy(outAccnosFilename, accnosFileName.c_str()); - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + char inFileName[1024]; + strcpy(inFileName, fastaFileNames[s].c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); + MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + if (pid == 0) { //you are the root process + + MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + //do your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + for (int i = 1; i < processors; i++) { + bool tempResult; + MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); + if (tempResult != 0) { MPIWroteAccnos = true; } + } + }else{ //you are a child process + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + MPIPos.resize(numSeqs+1); + MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + + //do your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); + } - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } + //close files + MPI_File_close(&inMPI); + MPI_File_close(&outMPI); + MPI_File_close(&outMPIAccnos); + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case + + //delete accnos file if blank + if (pid == 0) { + if (!MPIWroteAccnos) { + hasAccnos = false; + remove(accnosFileName.c_str()); + } + } - MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); - } - - //close files - MPI_File_close(&inMPI); - MPI_File_close(&outMPI); - MPI_File_close(&outMPIAccnos); - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case - - //delete accnos file if blank - if (pid == 0) { - if (!MPIWroteAccnos) { - //MPI_Info info; - //MPI_File_delete(outAccnosFilename, info); - hasAccnos = false; - remove(accnosFileName.c_str()); + #else + + //break up file + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + ifstream inFASTA; + openInputFile(fastaFileNames[s], inFASTA); + getNumSeqs(inFASTA, numSeqs); + inFASTA.close(); + + lines.push_back(new linePair(0, numSeqs)); + + driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); + + if (m->control_pressed) { + remove(outputFileName.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; + } + + //delete accnos file if its blank + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + + }else{ + vector positions; + processIDS.resize(0); + + ifstream inFASTA; + openInputFile(fastaFileNames[s], inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + numSeqs = positions.size(); + + int numSeqsPerProcessor = numSeqs / processors; + + for (int i = 0; i < processors; i++) { + long int startPos = positions[ i * numSeqsPerProcessor ]; + if(i == processors - 1){ + numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + createProcesses(outputFileName, fastaFileNames[s], accnosFileName); + + rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); + + //append output files + for(int i=1;i nonBlankAccnosFiles; + //delete blank accnos files generated with multiple processes + for(int i=0;icontrol_pressed) { + remove(outputFileName.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; + } } - } - #else - - //break up file - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ + #else ifstream inFASTA; - openInputFile(fastafile, inFASTA); + openInputFile(fastaFileNames[s], inFASTA); getNumSeqs(inFASTA, numSeqs); inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); - driver(lines[0], outputFileName, fastafile, accnosFileName); + driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); if (m->control_pressed) { - remove(outputFileName.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; + remove(outputFileName.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; } //delete accnos file if its blank if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } - - }else{ - vector positions; - processIDS.resize(0); - - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } - } - } - inFASTA.close(); - - numSeqs = positions.size(); - - int numSeqsPerProcessor = numSeqs / processors; - - for (int i = 0; i < processors; i++) { - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } - - - createProcesses(outputFileName, fastafile, accnosFileName); - - rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); - - //append output files - for(int i=1;i nonBlankAccnosFiles; - //delete blank accnos files generated with multiple processes - for(int i=0;icontrol_pressed) { - remove(outputFileName.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } - } - - #else - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - getNumSeqs(inFASTA, numSeqs); - inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); + #endif - driver(lines[0], outputFileName, fastafile, accnosFileName); + #endif + + delete chimera; + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - if (m->control_pressed) { - remove(outputFileName.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } + outputNames.push_back(outputFileName); + if (hasAccnos) { outputNames.push_back(accnosFileName); } - //delete accnos file if its blank - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } - #endif - - #endif - - delete chimera; - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + m->mothurOutEndLine(); + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); - if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); } + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); - + return 0; } diff --git a/chimerapintailcommand.h b/chimerapintailcommand.h index 0ddfc1c..b0dcceb 100644 --- a/chimerapintailcommand.h +++ b/chimerapintailcommand.h @@ -44,9 +44,11 @@ private: #endif bool abort, filter, MPIWroteAccnos; - string fastafile, templatefile, consfile, quanfile, maskfile, outputDir; + string fastafile, templatefile, consfile, quanfile, maskfile, outputDir, inputDir; int processors, window, increment, numSeqs, templateSeqsLength; Chimera* chimera; + vector outputNames; + vector fastaFileNames; }; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 004cfb0..35b0433 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -42,14 +42,6 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { if (inputDir == "not found"){ inputDir = ""; } else { string path; - it = parameters.find("fasta"); - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["fasta"] = inputDir + it->second; } - } - it = parameters.find("template"); //user has given a template file if(it != parameters.end()){ @@ -61,9 +53,55 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { //check for required parameters - fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true; } + fastafile = validParameter.validFile(parameters, "fasta", false); + if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(fastafile, fastaFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < fastaFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(fastaFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ableToOpen = openInputFile(fastaFileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + + if (ableToOpen == 1) { + m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + fastaFileNames.erase(fastaFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -139,6 +177,7 @@ void ChimeraSlayerCommand::help(){ m->mothurOut("This command was modeled after the chimeraSlayer written by the Broad Institute.\n"); m->mothurOut("The chimera.slayer command parameters are fasta, template, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign, m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n"); + m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n"); m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"); #ifdef USE_MPI @@ -158,7 +197,7 @@ void ChimeraSlayerCommand::help(){ m->mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n"); m->mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n"); m->mothurOut("The search parameter allows you to specify search method for finding the closest parent. Choices are distance, blast, and kmer, default distance. \n"); - //m->mothurOut("The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default false. Found to make results worse. \n"); + m->mothurOut("The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default false. \n"); m->mothurOut("The chimera.slayer command should be in the following format: \n"); m->mothurOut("chimera.slayer(fasta=yourFastaFile, template=yourTemplate, search=yourSearch) \n"); m->mothurOut("Example: chimera.slayer(fasta=AD.align, template=core_set_aligned.imputed.fasta, search=kmer) \n"); @@ -181,279 +220,274 @@ int ChimeraSlayerCommand::execute(){ if (abort == true) { return 0; } - int start = time(NULL); - - chimera = new ChimeraSlayer(fastafile, templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); - - string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.chimeras"; - string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.accnos"; - bool hasAccnos = true; - - if (m->control_pressed) { delete chimera; return 0; } - - if (chimera->getUnaligned()) { - m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); - delete chimera; - return 0; - } - templateSeqsLength = chimera->getLength(); + for (int s = 0; s < fastaFileNames.size(); s++) { + + m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); - #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; - int tag = 2001; - vector MPIPos; - MPIWroteAccnos = false; - - MPI_Status status; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - MPI_File inMPI; - MPI_File outMPI; - MPI_File outMPIAccnos; + int start = time(NULL); - int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - int inMode=MPI_MODE_RDONLY; + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); + + string outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "slayer.chimeras"; + string accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "slayer.accnos"; + bool hasAccnos = true; - //char* outFilename = new char[outputFileName.length()]; - //memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); + if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - char outFilename[1024]; - strcpy(outFilename, outputFileName.c_str()); - - //char* outAccnosFilename = new char[accnosFileName.length()]; - //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); + if (chimera->getUnaligned()) { + m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); + delete chimera; + return 0; + } + templateSeqsLength = chimera->getLength(); - char outAccnosFilename[1024]; - strcpy(outAccnosFilename, accnosFileName.c_str()); + #ifdef USE_MPI + int pid, end, numSeqsPerProcessor; + int tag = 2001; + vector MPIPos; + MPIWroteAccnos = false; + + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); - //char* inFileName = new char[fastafile.length()]; - //memcpy(inFileName, fastafile.c_str(), fastafile.length()); - - char inFileName[1024]; - strcpy(inFileName, fastafile.c_str()); + MPI_File inMPI; + MPI_File outMPI; + MPI_File outMPIAccnos; + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + int inMode=MPI_MODE_RDONLY; + + char outFilename[1024]; + strcpy(outFilename, outputFileName.c_str()); + + char outAccnosFilename[1024]; + strcpy(outAccnosFilename, accnosFileName.c_str()); + + char inFileName[1024]; + strcpy(inFileName, fastaFileNames[s].c_str()); - MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); - MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); - - //delete inFileName; - //delete outFilename; - //delete outAccnosFilename; + MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); + MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + if (pid == 0) { //you are the root process + m->mothurOutEndLine(); + m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results."); + m->mothurOutEndLine(); - if (pid == 0) { //you are the root process - m->mothurOutEndLine(); - m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results."); - m->mothurOutEndLine(); - - string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; - - //print header - int length = outTemp.length(); - char* buf2 = new char[length]; - memcpy(buf2, outTemp.c_str(), length); + string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; + + //print header + int length = outTemp.length(); + char* buf2 = new char[length]; + memcpy(buf2, outTemp.c_str(), length); - MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status); - delete buf2; + MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status); + delete buf2; - MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs + MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + //do your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; } + + for (int i = 1; i < processors; i++) { + bool tempResult; + MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); + if (tempResult != 0) { MPIWroteAccnos = true; } + } + }else{ //you are a child process + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + MPIPos.resize(numSeqs+1); + MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + + //figure out how many sequences you have to align + numSeqsPerProcessor = numSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + + //do your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + + MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); } - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } + //close files + MPI_File_close(&inMPI); + MPI_File_close(&outMPI); + MPI_File_close(&outMPIAccnos); + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case + //delete accnos file if blank + if (pid == 0) { + if (!MPIWroteAccnos) { + hasAccnos = false; + remove(accnosFileName.c_str()); + } + } - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + #else + ofstream outHeader; + string tempHeader = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "slayer.chimeras.tempHeader"; + openOutputFile(tempHeader, outHeader); + + chimera->printHeader(outHeader); + outHeader.close(); + + //break up file + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + ifstream inFASTA; + openInputFile(fastaFileNames[s], inFASTA); + getNumSeqs(inFASTA, numSeqs); + inFASTA.close(); + + lines.push_back(new linePair(0, numSeqs)); + + driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); + + if (m->control_pressed) { + remove(outputFileName.c_str()); + remove(tempHeader.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; + } + + //delete accnos file if its blank + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + + }else{ + vector positions; + processIDS.resize(0); + + ifstream inFASTA; + openInputFile(fastaFileNames[s], inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + numSeqs = positions.size(); + + int numSeqsPerProcessor = numSeqs / processors; + + for (int i = 0; i < processors; i++) { + long int startPos = positions[ i * numSeqsPerProcessor ]; + if(i == processors - 1){ + numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + createProcesses(outputFileName, fastaFileNames[s], accnosFileName); - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; } + rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); + + //append output files + for(int i=1;i nonBlankAccnosFiles; + //delete blank accnos files generated with multiple processes + for(int i=0;icontrol_pressed) { + remove(outputFileName.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; + } - for (int i = 1; i < processors; i++) { - bool tempResult; - MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); - if (tempResult != 0) { MPIWroteAccnos = true; } } - }else{ //you are a child process - MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - MPIPos.resize(numSeqs+1); - MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); - - //figure out how many sequences you have to align - numSeqsPerProcessor = numSeqs / processors; - int startIndex = pid * numSeqsPerProcessor; - if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } - - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } - MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); - } - - //close files - MPI_File_close(&inMPI); - MPI_File_close(&outMPI); - MPI_File_close(&outMPIAccnos); - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case - - //delete accnos file if blank - if (pid == 0) { - if (!MPIWroteAccnos) { - //MPI_Info info; - //MPI_File_delete(outAccnosFilename, info); - hasAccnos = false; - remove(accnosFileName.c_str()); - } - } - - #else - ofstream outHeader; - string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.chimeras.tempHeader"; - openOutputFile(tempHeader, outHeader); - - chimera->printHeader(outHeader); - outHeader.close(); - - //break up file - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ + #else ifstream inFASTA; - openInputFile(fastafile, inFASTA); + openInputFile(fastaFileNames[s], inFASTA); getNumSeqs(inFASTA, numSeqs); inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); - driver(lines[0], outputFileName, fastafile, accnosFileName); + driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); if (m->control_pressed) { - remove(outputFileName.c_str()); - remove(tempHeader.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; + remove(outputFileName.c_str()); + remove(tempHeader.c_str()); + remove(accnosFileName.c_str()); + for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete chimera; + return 0; } //delete accnos file if its blank if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } - - }else{ - vector positions; - processIDS.resize(0); - - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } - } - } - inFASTA.close(); - - numSeqs = positions.size(); - - int numSeqsPerProcessor = numSeqs / processors; - - for (int i = 0; i < processors; i++) { - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } - - createProcesses(outputFileName, fastafile, accnosFileName); + #endif - rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); - - //append output files - for(int i=1;i nonBlankAccnosFiles; - //delete blank accnos files generated with multiple processes - for(int i=0;icontrol_pressed) { - remove(outputFileName.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } - - } - - #else - ifstream inFASTA; - openInputFile(fastafile, inFASTA); - getNumSeqs(inFASTA, numSeqs); - inFASTA.close(); - lines.push_back(new linePair(0, numSeqs)); - - driver(lines[0], outputFileName, fastafile, accnosFileName); - - if (m->control_pressed) { - remove(outputFileName.c_str()); - remove(tempHeader.c_str()); - remove(accnosFileName.c_str()); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete chimera; - return 0; - } + appendFiles(outputFileName, tempHeader); + + remove(outputFileName.c_str()); + rename(tempHeader.c_str(), outputFileName.c_str()); - //delete accnos file if its blank - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } #endif - - appendFiles(outputFileName, tempHeader); - - remove(outputFileName.c_str()); - rename(tempHeader.c_str(), outputFileName.c_str()); - - #endif - delete chimera; + delete chimera; + + + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + + outputNames.push_back(outputFileName); + if (hasAccnos) { outputNames.push_back(accnosFileName); } + + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outputFileName); m->mothurOutEndLine(); - if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); } + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); - return 0; } diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index 926326b..7ae956a 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -48,6 +48,8 @@ private: float divR; Chimera* chimera; + vector outputNames; + vector fastaFileNames; };