X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=aligncommand.cpp;h=3f70e2d3733a2d40da2012d8ba31bff61c768b9b;hb=6e81846c8e5b2614f6b06643a9f558fb0e6669fa;hp=242e9a8fe0e3ba7a7f199846ae0e80cba10f0edb;hpb=6f9dc7ca71d7b8ed4aba2824f76ae6b29da89f3b;p=mothur.git diff --git a/aligncommand.cpp b/aligncommand.cpp index 242e9a8..3f70e2d 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -12,137 +12,839 @@ * allows for a global alignment and not the local alignment provided by bLAst. Furthermore, it has the potential to * provide a better alignment because of the banding method employed by blast (I'm not sure about this). * - * to compile type: - * make - * - * for basic instructions on how to run nastPlus type: - * ./nastPlus */ #include "aligncommand.h" #include "sequence.hpp" -#include "alignment.hpp" #include "gotohoverlap.hpp" #include "needlemanoverlap.hpp" #include "blastalign.hpp" #include "noalign.hpp" -#include "database.hpp" -#include "kmerdb.hpp" -#include "suffixdb.hpp" -#include "blastdb.hpp" - #include "nast.hpp" #include "nastreport.hpp" //********************************************************************************************************************** -AlignCommand::AlignCommand(){ + +AlignCommand::AlignCommand(string option) { try { - globaldata = GlobalData::getInstance(); - candidateFileName = globaldata->getCandidateFile(); - templateFileName = globaldata->getFastaFile(); - openInputFile(candidateFileName, in); - convert(globaldata->getKSize(), kmerSize); - convert(globaldata->getMatch(), match); - convert(globaldata->getMismatch(), misMatch); - convert(globaldata->getGapopen(), gapOpen); - convert(globaldata->getGapextend(), gapExtend); + + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + + //valid paramters for this command + string AlignArray[] = {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors","flip","threshold","outputdir","inputdir"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter("align.seqs"); + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + + it = parameters.find("template"); + + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["template"] = inputDir + it->second; } + } + } + + //check for required parameters + templateFileName = validParameter.validFile(parameters, "template", true); + + if (templateFileName == "not found") { + m->mothurOut("template is a required parameter for the align.seqs command."); + m->mothurOutEndLine(); + abort = true; + }else if (templateFileName == "not open") { abort = true; } + + candidateFileName = validParameter.validFile(parameters, "candidate", false); + if (candidateFileName == "not found") { m->mothurOut("candidate is a required parameter for the align.seqs command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(candidateFileName, candidateFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < candidateFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(candidateFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { candidateFileNames[i] = inputDir + candidateFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + ableToOpen = openInputFile(candidateFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + getSimpleName(candidateFileNames[i]); + m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = openInputFile(tryPath, in, "noerror"); + candidateFileNames[i] = tryPath; + } + } + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + candidateFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + candidateFileNames.erase(candidateFileNames.begin()+i); + i--; + } + + } + + //make sure there is at least one valid file left + if (candidateFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found"){ temp = "8"; } + convert(temp, kmerSize); + + temp = validParameter.validFile(parameters, "match", false); if (temp == "not found"){ temp = "1.0"; } + convert(temp, match); + + temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; } + convert(temp, misMatch); + + temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; } + convert(temp, gapOpen); + + temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; } + convert(temp, gapExtend); + + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } + convert(temp, processors); + + temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "f"; } + flip = isTrue(temp); + + temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "0.50"; } + convert(temp, threshold); + + search = validParameter.validFile(parameters, "search", false); if (search == "not found"){ search = "kmer"; } + + align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } + } + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function AlignCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "AlignCommand", "AlignCommand"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the AlignCommand class function AlignCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } //********************************************************************************************************************** -AlignCommand::~AlignCommand(){ - +AlignCommand::~AlignCommand(){ + + if (abort == false) { + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete templateDB; + delete alignment; + } +} + +//********************************************************************************************************************** + +void AlignCommand::help(){ + try { + m->mothurOut("The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n"); + m->mothurOut("The align.seqs command parameters are template, candidate, search, ksize, align, match, mismatch, gapopen and gapextend.\n"); + m->mothurOut("The template and candidate parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); + m->mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer.\n"); + m->mothurOut("The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n"); + m->mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 8.\n"); + m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"); + m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"); + m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"); + m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"); + m->mothurOut("The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold. The default is false.\n"); + m->mothurOut("The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.\n"); + m->mothurOut("If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.\n"); + m->mothurOut("The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.\n"); + m->mothurOut("The align.seqs command should be in the following format: \n"); + m->mothurOut("align.seqs(template=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n"); + m->mothurOut("Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. candidate), '=' and parameters (i.e.yourFastaFile).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "help"); + exit(1); + } } + //********************************************************************************************************************** int AlignCommand::execute(){ try { - srand( (unsigned)time( NULL ) ); //needed to assign names to temporary files - - Database* templateDB; - if(globaldata->getSearch() == "kmer") { templateDB = new KmerDB(templateFileName, kmerSize); } - else if(globaldata->getSearch() == "suffix") { templateDB = new SuffixDB(templateFileName); } - else if(globaldata->getSearch() == "blast") { templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch); } - else { cout << globaldata->getSearch() << " is not a valid search option. I will run the command using suffix." << endl; - templateDB = new SuffixDB(templateFileName); } - - Alignment* alignment; - if(globaldata->getAlign() == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 3000); } - else if(globaldata->getAlign() == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000); } - else if(globaldata->getAlign() == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } - else if(globaldata->getAlign() == "noalign") { alignment = new NoAlign(); } - else { cout << globaldata->getAlign() << " is not a valid alignment option. I will run the command using blast." << endl; - alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } - - int numFastaSeqs=count(istreambuf_iterator(in),istreambuf_iterator(), '>'); - in.seekg(0); - - string candidateAligngmentFName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + globaldata->getSearch() + '.' + globaldata->getAlign() + ".nast.align"; - ofstream candidateAlignmentFile; - openOutputFile(candidateAligngmentFName, candidateAlignmentFile); + if (abort == true) { return 0; } - string candidateReportFName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + globaldata->getSearch() + '.' + globaldata->getAlign() + ".nast.report"; - NastReport report(candidateReportFName); + templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch); + int longestBase = templateDB->getLongestBase(); + + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } + else { + m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); + m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); + } + vector outputNames; + + for (int s = 0; s < candidateFileNames.size(); s++) { + if (m->control_pressed) { return 0; } + + m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine(); + + if (outputDir == "") { outputDir += hasPath(candidateFileNames[s]); } + string alignFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align"; + string reportFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align.report"; + string accnosFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "flip.accnos"; + bool hasAccnos = true; + + int numFastaSeqs = 0; + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + int start = time(NULL); + +#ifdef USE_MPI + int pid, end, numSeqsPerProcessor; + int tag = 2001; + vector MPIPos; + MPIWroteAccnos = false; + + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); - cout << "We are going to align the " << numFastaSeqs << " sequences in " << candidateFileName << "..." << endl; - cout.flush(); - - int start = time(NULL); + MPI_File inMPI; + MPI_File outMPIAlign; + MPI_File outMPIReport; + MPI_File outMPIAccnos; + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + int inMode=MPI_MODE_RDONLY; + + char outAlignFilename[1024]; + strcpy(outAlignFilename, alignFileName.c_str()); + + char outReportFilename[1024]; + strcpy(outReportFilename, reportFileName.c_str()); + + char outAccnosFilename[1024]; + strcpy(outAccnosFilename, accnosFileName.c_str()); + + char inFileName[1024]; + strcpy(inFileName, candidateFileNames[s].c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + MPI_File_open(MPI_COMM_WORLD, outAlignFilename, outMode, MPI_INFO_NULL, &outMPIAlign); + MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport); + MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } + + if (pid == 0) { //you are the root process + + MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } + + //figure out how many sequences you have to align + numSeqsPerProcessor = numFastaSeqs / processors; + int startIndex = pid * numSeqsPerProcessor; + if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } + + //align your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } - for(int i=0;icontrol_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } - Sequence* templateSeq = templateDB->findClosestSequence(candidateSeq); - report.setTemplate(templateSeq); - report.setSearchParameters(globaldata->getSearch(), templateDB->getSearchScore()); + MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); + } + + //close files + MPI_File_close(&inMPI); + MPI_File_close(&outMPIAlign); + MPI_File_close(&outMPIReport); + MPI_File_close(&outMPIAccnos); - Nast nast(alignment, candidateSeq, templateSeq); - report.setAlignmentParameters(globaldata->getAlign(), alignment); - report.setNastParameters(nast); + //delete accnos file if blank + if (pid == 0) { + //delete accnos file if its blank else report to user + if (MPIWroteAccnos) { + m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + }else { + //MPI_Info info; + //MPI_File_delete(outAccnosFilename, info); + hasAccnos = false; + remove(accnosFileName.c_str()); + } + } + +#else + vector positions = divideFile(candidateFileNames[s], processors); + + for (int i = 0; i < (positions.size()-1); i++) { + lines.push_back(new linePair(positions[i], positions[(i+1)])); + } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } + + //delete accnos file if its blank else report to user + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + else { + m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + } + }else{ + processIDS.resize(0); + + numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + + rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str()); + rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str()); + + //append alignment and report files + for(int i=1;i nonBlankAccnosFiles; + //delete blank accnos files generated with multiple processes + for(int i=0;imothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + }else{ hasAccnos = false; } + + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } + } + #else + numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + + if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; } + + //delete accnos file if its blank else report to user + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + else { + m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + } + + #endif + +#endif + - candidateAlignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl; - candidateAlignmentFile.flush(); + #ifdef USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif - if((i+1) % 100 == 0){ - cout << "It has taken " << time(NULL) - start << " secs to align " << i+1 << " sequences" << endl; + outputNames.push_back(alignFileName); + outputNames.push_back(reportFileName); + if (hasAccnos) { outputNames.push_back(accnosFileName); } + + #ifdef USE_MPI } - report.print(); + #endif + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences."); + m->mothurOutEndLine(); + m->mothurOutEndLine(); + } + + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** + +int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){ + try { + ofstream alignmentFile; + openOutputFile(alignFName, alignmentFile); + + ofstream accnosFile; + openOutputFile(accnosFName, accnosFile); + + NastReport report(reportFName); - delete candidateSeq; + ifstream inFASTA; + openInputFile(filename, inFASTA); + + inFASTA.seekg(filePos->start); + + bool done = false; + int count = 0; + + while (!done) { + + if (m->control_pressed) { return 0; } + + Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); + + int origNumBases = candidateSeq->getNumBases(); + string originalUnaligned = candidateSeq->getUnaligned(); + int numBasesNeeded = origNumBases * threshold; + + if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file + if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { + alignment->resize(candidateSeq->getUnaligned().length()+1); + } + + Sequence temp = templateDB->findClosestSequence(candidateSeq); + Sequence* templateSeq = &temp; + + float searchScore = templateDB->getSearchScore(); + + Nast* nast = new Nast(alignment, candidateSeq, templateSeq); + Sequence* copy; + + Nast* nast2; + bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below + //since nast does not make a copy of hte sequence passed, and it is used by the reporter below + //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place + //so this bool tells you if you need to delete it + + //if there is a possibility that this sequence should be reversed + if (candidateSeq->getNumBases() < numBasesNeeded) { + + string wasBetter = ""; + //if the user wants you to try the reverse + if (flip) { + //get reverse compliment + copy = new Sequence(candidateSeq->getName(), originalUnaligned); + copy->reverseComplement(); + + //rerun alignment + Sequence temp2 = templateDB->findClosestSequence(copy); + Sequence* templateSeq2 = &temp2; + + searchScore = templateDB->getSearchScore(); + + nast2 = new Nast(alignment, copy, templateSeq2); + + //check if any better + if (copy->getNumBases() > candidateSeq->getNumBases()) { + candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better + templateSeq = templateSeq2; + delete nast; + nast = nast2; + needToDeleteCopy = true; + wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement."; + }else{ + wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence."; + delete nast2; + delete copy; + } + } + + //create accnos file with names + accnosFile << candidateSeq->getName() << wasBetter << endl; + } + + report.setCandidate(candidateSeq); + report.setTemplate(templateSeq); + report.setSearchParameters(search, searchScore); + report.setAlignmentParameters(align, alignment); + report.setNastParameters(*nast); + + alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl; + + report.print(); + delete nast; + if (needToDeleteCopy) { delete copy; } + + count++; + } + delete candidateSeq; + + unsigned long int pos = inFASTA.tellg(); + if ((pos == -1) || (pos >= filePos->end)) { break; } + + //report progress + if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + } + //report progress + if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + + alignmentFile.close(); + inFASTA.close(); + accnosFile.close(); - cout << "It took " << time(NULL) - start << " secs to align " << numFastaSeqs << " sequences" << endl; - cout << endl; + return count; + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "driver"); + exit(1); + } +} +//********************************************************************************************************************** +#ifdef USE_MPI +int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector& MPIPos){ + try { + string outputString = ""; + MPI_Status statusReport; + MPI_Status statusAlign; + MPI_Status statusAccnos; + MPI_Status status; + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + NastReport report; + + if (pid == 0) { + outputString = report.getHeaders(); + int length = outputString.length(); + + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(reportFile, buf, length, MPI_CHAR, &statusReport); - delete templateDB; - delete alignment; + delete buf; + } + + for(int i=0;icontrol_pressed) { return 0; } + + //read next sequence + int length = MPIPos[start+i+1] - MPIPos[start+i]; + + char* buf4 = new char[length]; + //memcpy(buf4, outputString.c_str(), length); + + MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); + + string tempBuf = buf4; + + delete buf4; + + if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } + + istringstream iss (tempBuf,istringstream::in); + + Sequence* candidateSeq = new Sequence(iss); + + int origNumBases = candidateSeq->getNumBases(); + string originalUnaligned = candidateSeq->getUnaligned(); + int numBasesNeeded = origNumBases * threshold; + + if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file + if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { + alignment->resize(candidateSeq->getUnaligned().length()+1); + } + + Sequence temp = templateDB->findClosestSequence(candidateSeq); + Sequence* templateSeq = &temp; + + float searchScore = templateDB->getSearchScore(); + + Nast* nast = new Nast(alignment, candidateSeq, templateSeq); + Sequence* copy; + + Nast* nast2; + bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below + //since nast does not make a copy of hte sequence passed, and it is used by the reporter below + //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place + //so this bool tells you if you need to delete it + + //if there is a possibility that this sequence should be reversed + if (candidateSeq->getNumBases() < numBasesNeeded) { + + string wasBetter = ""; + //if the user wants you to try the reverse + if (flip) { + //get reverse compliment + copy = new Sequence(candidateSeq->getName(), originalUnaligned); + copy->reverseComplement(); + + //rerun alignment + Sequence temp2 = templateDB->findClosestSequence(copy); + Sequence* templateSeq2 = &temp2; + + searchScore = templateDB->getSearchScore(); + + nast2 = new Nast(alignment, copy, templateSeq2); + + //check if any better + if (copy->getNumBases() > candidateSeq->getNumBases()) { + candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better + templateSeq = templateSeq2; + delete nast; + nast = nast2; + needToDeleteCopy = true; + }else{ + wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; + delete nast2; + delete copy; + } + } + + //create accnos file with names + outputString = candidateSeq->getName() + wasBetter + "\n"; + + //send results to parent + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); - return 0; + MPI_File_write_shared(accnosFile, buf, length, MPI_CHAR, &statusAccnos); + delete buf; + MPIWroteAccnos = true; + } + + report.setCandidate(candidateSeq); + report.setTemplate(templateSeq); + report.setSearchParameters(search, searchScore); + report.setAlignmentParameters(align, alignment); + report.setNastParameters(*nast); + + outputString = ">" + candidateSeq->getName() + "\n" + candidateSeq->getAligned() + "\n"; + + //send results to parent + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + + MPI_File_write_shared(alignFile, buf2, length, MPI_CHAR, &statusAlign); + + delete buf2; + + outputString = report.getReport(); + + //send results to parent + length = outputString.length(); + char* buf3 = new char[length]; + memcpy(buf3, outputString.c_str(), length); + + MPI_File_write_shared(reportFile, buf3, length, MPI_CHAR, &statusReport); + + delete buf3; + delete nast; + if (needToDeleteCopy) { delete copy; } + } + delete candidateSeq; + + //report progress + if((i+1) % 100 == 0){ cout << (toString(i+1)) << endl; } + } + //report progress + if((num) % 100 != 0){ cout << (toString(num)) << endl; } + + return 1; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "AlignCommand", "driverMPI"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the AlignCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} +#endif +/**************************************************************************************************/ + +int AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename) { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 0; + int num = 0; + // processIDS.resize(0); + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + + //pass numSeqs to parent + ofstream out; + string tempFile = toString(getpid()) + ".temp"; + openOutputFile(tempFile, out); + out << num << endl; + out.close(); + + exit(0); + }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + } + + //force parent to wait until all the processes are done + for (int i=0;i> tempNum; num += tempNum; } + in.close(); remove(tempFile.c_str()); + } + + return num; +#endif + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "createProcesses"); exit(1); - } + } } +/**************************************************************************************************/ -//********************************************************************************************************************** \ No newline at end of file +void AlignCommand::appendAlignFiles(string temp, string filename) { + try{ + + ofstream output; + ifstream input; + openOutputFileAppend(filename, output); + openInputFile(temp, input); + + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } + } + + input.close(); + output.close(); + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "appendAlignFiles"); + exit(1); + } +} +//********************************************************************************************************************** + +void AlignCommand::appendReportFiles(string temp, string filename) { + try{ + + ofstream output; + ifstream input; + openOutputFileAppend(filename, output); + openInputFile(temp, input); + + while (!input.eof()) { char c = input.get(); if (c == 10 || c == 13){ break; } } // get header line + + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } + } + + input.close(); + output.close(); + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "appendReportFiles"); + exit(1); + } +} +//**********************************************************************************************************************