From 2bbb7273d4bf5209f098c764551c6e072d60df36 Mon Sep 17 00:00:00 2001 From: westcott Date: Sat, 17 Apr 2010 00:10:32 +0000 Subject: [PATCH] 1.9 --- aligncommand.cpp | 1749 +++++++++++++++++---------------- alignmentdb.cpp | 521 +++++----- bellerophon.cpp | 23 +- ccode.cpp | 28 +- chimera.cpp | 25 +- chimerabellerophoncommand.cpp | 12 +- chimeraccodecommand.cpp | 34 +- chimeracheckcommand.cpp | 16 +- chimeracheckrdp.cpp | 31 +- chimerapintailcommand.cpp | 21 +- chimeraslayer.cpp | 12 +- chimeraslayercommand.cpp | 33 +- classify.cpp | 529 +++++----- classifyseqscommand.cpp | 63 +- commandfactory.hpp | 90 +- distancecommand.cpp | 44 +- filterseqscommand.cpp | 34 +- pintail.cpp | 46 +- sequence.cpp | 34 - 19 files changed, 1708 insertions(+), 1637 deletions(-) diff --git a/aligncommand.cpp b/aligncommand.cpp index a4b3a79..38afa35 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -1,865 +1,884 @@ -/* - * aligncommand.cpp - * Mothur - * - * Created by Sarah Westcott on 5/15/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - * This version of nast does everything I think that the greengenes nast server does and then some. I have added the - * feature of allowing users to define their database, kmer size for searching, alignment penalty values and alignment - * method. This latter feature is perhaps most significant. nastPlus enables a user to use either a Needleman-Wunsch - * (non-affine gap penalty) or Gotoh (affine gap penalty) pairwise alignment algorithm. This is significant because it - * allows for a global alignment and not the local alignment provided by bLAst. Furthermore, it has the potential to - * provide a better alignment because of the banding method employed by blast (I'm not sure about this). - * - */ - -#include "aligncommand.h" -#include "sequence.hpp" - -#include "gotohoverlap.hpp" -#include "needlemanoverlap.hpp" -#include "blastalign.hpp" -#include "noalign.hpp" - -#include "nast.hpp" -#include "nastreport.hpp" - - -//********************************************************************************************************************** - -AlignCommand::AlignCommand(string option) { - try { - - abort = false; - - //allow user to run help - if(option == "help") { help(); abort = true; } - - else { - - //valid paramters for this command - string AlignArray[] = {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors","flip","threshold","outputdir","inputdir"}; - vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); - - OptionParser parser(option); - map parameters = parser.getParameters(); - - ValidParameters validParameter; - map::iterator it; - - //check to make sure all parameters are valid for command - for (it = parameters.begin(); it != parameters.end(); it++) { - if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } - } - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } - - - //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); - - if (inputDir == "not found"){ inputDir = ""; } - else { - string path; - - it = parameters.find("template"); - - //user has given a template file - if(it != parameters.end()){ - path = hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["template"] = inputDir + it->second; } - } - } - - //check for required parameters - templateFileName = validParameter.validFile(parameters, "template", true); - - if (templateFileName == "not found") { - m->mothurOut("template is a required parameter for the align.seqs command."); - m->mothurOutEndLine(); - abort = true; - }else if (templateFileName == "not open") { abort = true; } - - candidateFileName = validParameter.validFile(parameters, "candidate", false); - if (candidateFileName == "not found") { m->mothurOut("candidate is a required parameter for the align.seqs command."); m->mothurOutEndLine(); abort = true; } - else { - splitAtDash(candidateFileName, candidateFileNames); - - //go through files and make sure they are good, if not, then disregard them - for (int i = 0; i < candidateFileNames.size(); i++) { - if (inputDir != "") { - string path = hasPath(candidateFileNames[i]); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { candidateFileNames[i] = inputDir + candidateFileNames[i]; } - } - - int ableToOpen; - ifstream in; - - #ifdef USE_MPI - int pid; - MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - if (pid == 0) { - #endif - - ableToOpen = openInputFile(candidateFileNames[i], in); - in.close(); - - #ifdef USE_MPI - for (int j = 1; j < processors; j++) { - MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); - } - }else{ - MPI_Status status; - MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); - } - - #endif - - if (ableToOpen == 1) { - m->mothurOut(candidateFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); - //erase from file list - candidateFileNames.erase(candidateFileNames.begin()+i); - i--; - } - - } - - //make sure there is at least one valid file left - if (candidateFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } - } - - //check for optional parameter and set defaults - // ...at some point should added some additional type checking... - string temp; - temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found"){ temp = "8"; } - convert(temp, kmerSize); - - temp = validParameter.validFile(parameters, "match", false); if (temp == "not found"){ temp = "1.0"; } - convert(temp, match); - - temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; } - convert(temp, misMatch); - - temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; } - convert(temp, gapOpen); - - temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; } - convert(temp, gapExtend); - - temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } - convert(temp, processors); - - temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "f"; } - flip = isTrue(temp); - - temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "0.50"; } - convert(temp, threshold); - - search = validParameter.validFile(parameters, "search", false); if (search == "not found"){ search = "kmer"; } - - align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } - } - - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "AlignCommand"); - exit(1); - } -} - -//********************************************************************************************************************** - -AlignCommand::~AlignCommand(){ - - if (abort == false) { - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - delete templateDB; - delete alignment; - } -} - -//********************************************************************************************************************** - -void AlignCommand::help(){ - try { - m->mothurOut("The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n"); - m->mothurOut("The align.seqs command parameters are template, candidate, search, ksize, align, match, mismatch, gapopen and gapextend.\n"); - m->mothurOut("The template and candidate parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); - m->mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer.\n"); - m->mothurOut("The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n"); - m->mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 8.\n"); - m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"); - m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"); - m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"); - m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"); - m->mothurOut("The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold. The default is false.\n"); - m->mothurOut("The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.\n"); - m->mothurOut("If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.\n"); - m->mothurOut("The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.\n"); - m->mothurOut("The align.seqs command should be in the following format: \n"); - m->mothurOut("align.seqs(template=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n"); - m->mothurOut("Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. candidate), '=' and parameters (i.e.yourFastaFile).\n\n"); - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "help"); - exit(1); - } -} - - -//********************************************************************************************************************** - -int AlignCommand::execute(){ - try { - if (abort == true) { return 0; } - - templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch); - int longestBase = templateDB->getLongestBase(); - - if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } - else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } - else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } - else if(align == "noalign") { alignment = new NoAlign(); } - else { - m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); - m->mothurOutEndLine(); - alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); - } - vector outputNames; - - for (int s = 0; s < candidateFileNames.size(); s++) { - if (m->control_pressed) { return 0; } - - m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine(); - - if (outputDir == "") { outputDir += hasPath(candidateFileNames[s]); } - string alignFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align"; - string reportFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align.report"; - string accnosFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "flip.accnos"; - bool hasAccnos = true; - - int numFastaSeqs = 0; - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - int start = time(NULL); - -#ifdef USE_MPI - int pid, end, numSeqsPerProcessor; - int tag = 2001; - vector MPIPos; - MPIWroteAccnos = false; - - MPI_Status status; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - MPI_File inMPI; - MPI_File outMPIAlign; - MPI_File outMPIReport; - MPI_File outMPIAccnos; - - int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - int inMode=MPI_MODE_RDONLY; - - char outAlignFilename[alignFileName.length()]; - strcpy(outAlignFilename, alignFileName.c_str()); - - char outReportFilename[reportFileName.length()]; - strcpy(outReportFilename, reportFileName.c_str()); - - char outAccnosFilename[accnosFileName.length()]; - strcpy(outAccnosFilename, accnosFileName.c_str()); - - char inFileName[candidateFileNames[s].length()]; - strcpy(inFileName, candidateFileNames[s].c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - MPI_File_open(MPI_COMM_WORLD, outAlignFilename, outMode, MPI_INFO_NULL, &outMPIAlign); - MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport); - MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } - - if (pid == 0) { //you are the root process - - MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs - - //send file positions to all processes - MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs - MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos - - //figure out how many sequences you have to align - numSeqsPerProcessor = numFastaSeqs / processors; - if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } - int startIndex = pid * numSeqsPerProcessor; - - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } - - for (int i = 1; i < processors; i++) { - bool tempResult; - MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); - if (tempResult != 0) { MPIWroteAccnos = true; } - } - }else{ //you are a child process - MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs - MPIPos.resize(numFastaSeqs+1); - MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions - - //figure out how many sequences you have to align - numSeqsPerProcessor = numFastaSeqs / processors; - if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } - int startIndex = pid * numSeqsPerProcessor; - - //align your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } - - MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); - } - - //close files - MPI_File_close(&inMPI); - MPI_File_close(&outMPIAlign); - MPI_File_close(&outMPIReport); - MPI_File_close(&outMPIAccnos); - - //delete accnos file if blank - if (pid == 0) { - //delete accnos file if its blank else report to user - if (MPIWroteAccnos) { - m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); - if (!flip) { - m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); - }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } - m->mothurOutEndLine(); - }else { - //MPI_Info info; - //MPI_File_delete(outAccnosFilename, info); - hasAccnos = false; - remove(accnosFileName.c_str()); - } - } - -#else - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); - inFASTA.close(); - - lines.push_back(new linePair(0, numFastaSeqs)); - - driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } - - //delete accnos file if its blank else report to user - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } - else { - m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); - if (!flip) { - m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); - }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } - m->mothurOutEndLine(); - } - } - else{ - vector positions; - processIDS.resize(0); - - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } - } - } - inFASTA.close(); - - numFastaSeqs = positions.size(); - - int numSeqsPerProcessor = numFastaSeqs / processors; - - for (int i = 0; i < processors; i++) { - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } - - createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - - rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str()); - rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str()); - - //append alignment and report files - for(int i=1;i nonBlankAccnosFiles; - //delete blank accnos files generated with multiple processes - for(int i=0;imothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); - if (!flip) { - m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); - }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } - m->mothurOutEndLine(); - }else{ hasAccnos = false; } - - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } - } - #else - ifstream inFASTA; - openInputFile(candidateFileNames[s], inFASTA); - numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); - inFASTA.close(); - - lines.push_back(new linePair(0, numFastaSeqs)); - - driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); - - if (m->control_pressed) { - remove(accnosFileName.c_str()); - remove(alignFileName.c_str()); - remove(reportFileName.c_str()); - return 0; - } - - //delete accnos file if its blank else report to user - if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } - else { - m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); - if (!flip) { - m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); - }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } - m->mothurOutEndLine(); - } - - #endif - -#endif - - - #ifdef USE_MPI - MPI_Comm_rank(MPI_COMM_WORLD, &pid); - - if (pid == 0) { //only one process should output to screen - #endif - - outputNames.push_back(alignFileName); - outputNames.push_back(reportFileName); - if (hasAccnos) { outputNames.push_back(accnosFileName); } - - #ifdef USE_MPI - } - #endif - - m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences."); - m->mothurOutEndLine(); - m->mothurOutEndLine(); - } - - - m->mothurOutEndLine(); - m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } - m->mothurOutEndLine(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "execute"); - exit(1); - } -} - -//********************************************************************************************************************** - -int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName, string filename){ - try { - ofstream alignmentFile; - openOutputFile(alignFName, alignmentFile); - - ofstream accnosFile; - openOutputFile(accnosFName, accnosFile); - - NastReport report(reportFName); - - ifstream inFASTA; - openInputFile(filename, inFASTA); - - inFASTA.seekg(line->start); - - for(int i=0;inumSeqs;i++){ - - if (m->control_pressed) { return 0; } - - Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); - - int origNumBases = candidateSeq->getNumBases(); - string originalUnaligned = candidateSeq->getUnaligned(); - int numBasesNeeded = origNumBases * threshold; - - if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file - if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { - alignment->resize(candidateSeq->getUnaligned().length()+1); - } - - Sequence temp = templateDB->findClosestSequence(candidateSeq); - Sequence* templateSeq = &temp; - - float searchScore = templateDB->getSearchScore(); - - Nast* nast = new Nast(alignment, candidateSeq, templateSeq); - Sequence* copy; - - Nast* nast2; - bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below - //since nast does not make a copy of hte sequence passed, and it is used by the reporter below - //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place - //so this bool tells you if you need to delete it - - //if there is a possibility that this sequence should be reversed - if (candidateSeq->getNumBases() < numBasesNeeded) { - - string wasBetter = ""; - //if the user wants you to try the reverse - if (flip) { - //get reverse compliment - copy = new Sequence(candidateSeq->getName(), originalUnaligned); - copy->reverseComplement(); - - //rerun alignment - Sequence temp2 = templateDB->findClosestSequence(copy); - Sequence* templateSeq2 = &temp2; - - searchScore = templateDB->getSearchScore(); - - nast2 = new Nast(alignment, copy, templateSeq2); - - //check if any better - if (copy->getNumBases() > candidateSeq->getNumBases()) { - candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better - templateSeq = templateSeq2; - delete nast; - nast = nast2; - needToDeleteCopy = true; - }else{ - wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; - delete nast2; - delete copy; - } - } - - //create accnos file with names - accnosFile << candidateSeq->getName() << wasBetter << endl; - } - - report.setCandidate(candidateSeq); - report.setTemplate(templateSeq); - report.setSearchParameters(search, searchScore); - report.setAlignmentParameters(align, alignment); - report.setNastParameters(*nast); - - alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl; - - report.print(); - delete nast; - if (needToDeleteCopy) { delete copy; } - } - delete candidateSeq; - - //report progress - if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } - } - //report progress - if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine(); } - - alignmentFile.close(); - inFASTA.close(); - accnosFile.close(); - - return 1; - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "driver"); - exit(1); - } -} -//********************************************************************************************************************** -#ifdef USE_MPI -int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector& MPIPos){ - try { - string outputString = ""; - MPI_Status statusReport; - MPI_Status statusAlign; - MPI_Status statusAccnos; - MPI_Status status; - int pid; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - NastReport report; - - if (pid == 0) { - outputString = report.getHeaders(); - int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); - - MPI_File_write_shared(reportFile, buf, length, MPI_CHAR, &statusReport); - } - - for(int i=0;icontrol_pressed) { return 0; } - - //read next sequence - int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; - MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); - - string tempBuf = buf4; - if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } - istringstream iss (tempBuf,istringstream::in); - - Sequence* candidateSeq = new Sequence(iss); - int origNumBases = candidateSeq->getNumBases(); - string originalUnaligned = candidateSeq->getUnaligned(); - int numBasesNeeded = origNumBases * threshold; - - if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file - if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { - alignment->resize(candidateSeq->getUnaligned().length()+1); - } - - Sequence temp = templateDB->findClosestSequence(candidateSeq); - Sequence* templateSeq = &temp; - - float searchScore = templateDB->getSearchScore(); - - Nast* nast = new Nast(alignment, candidateSeq, templateSeq); - Sequence* copy; - - Nast* nast2; - bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below - //since nast does not make a copy of hte sequence passed, and it is used by the reporter below - //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place - //so this bool tells you if you need to delete it - - //if there is a possibility that this sequence should be reversed - if (candidateSeq->getNumBases() < numBasesNeeded) { - - string wasBetter = ""; - //if the user wants you to try the reverse - if (flip) { - //get reverse compliment - copy = new Sequence(candidateSeq->getName(), originalUnaligned); - copy->reverseComplement(); - - //rerun alignment - Sequence temp2 = templateDB->findClosestSequence(copy); - Sequence* templateSeq2 = &temp2; - - searchScore = templateDB->getSearchScore(); - - nast2 = new Nast(alignment, copy, templateSeq2); - - //check if any better - if (copy->getNumBases() > candidateSeq->getNumBases()) { - candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better - templateSeq = templateSeq2; - delete nast; - nast = nast2; - needToDeleteCopy = true; - }else{ - wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; - delete nast2; - delete copy; - } - } - - //create accnos file with names - outputString = candidateSeq->getName() + wasBetter + "\n"; - - //send results to parent - int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); - - MPI_File_write_shared(accnosFile, buf, length, MPI_CHAR, &statusAccnos); - MPIWroteAccnos = true; - } - - report.setCandidate(candidateSeq); - report.setTemplate(templateSeq); - report.setSearchParameters(search, searchScore); - report.setAlignmentParameters(align, alignment); - report.setNastParameters(*nast); - - outputString = ">" + candidateSeq->getName() + "\n" + candidateSeq->getAligned() + "\n"; - - //send results to parent - int length = outputString.length(); - char buf2[length]; - strcpy(buf2, outputString.c_str()); - - MPI_File_write_shared(alignFile, buf2, length, MPI_CHAR, &statusAlign); - - outputString = report.getReport(); - - //send results to parent - length = outputString.length(); - char buf3[length]; - strcpy(buf3, outputString.c_str()); - - MPI_File_write_shared(reportFile, buf3, length, MPI_CHAR, &statusReport); - - delete nast; - if (needToDeleteCopy) { delete copy; } - } - delete candidateSeq; - - //report progress - if((i+1) % 100 == 0){ cout << (toString(i+1)) << endl; } - } - //report progress - if((num) % 100 != 0){ cout << (toString(num)) << endl; } - - return 1; - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "driverMPI"); - exit(1); - } -} -#endif -/**************************************************************************************************/ - -int AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename) { - try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; - int exitCommand = 1; - // processIDS.resize(0); - - //loop through and create all the processes you want - while (process != processors) { - int pid = fork(); - - if (pid > 0) { - processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later - process++; - }else if (pid == 0){ - exitCommand = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); - exit(0); - }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } - } - - //force parent to wait until all the processes are done - for (int i=0;ierrorOut(e, "AlignCommand", "createProcesses"); - exit(1); - } -} - -/**************************************************************************************************/ - -void AlignCommand::appendAlignFiles(string temp, string filename) { - try{ - - ofstream output; - ifstream input; - openOutputFileAppend(filename, output); - openInputFile(temp, input); - - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } - } - - input.close(); - output.close(); - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "appendAlignFiles"); - exit(1); - } -} -//********************************************************************************************************************** - -void AlignCommand::appendReportFiles(string temp, string filename) { - try{ - - ofstream output; - ifstream input; - openOutputFileAppend(filename, output); - openInputFile(temp, input); - - while (!input.eof()) { char c = input.get(); if (c == 10 || c == 13){ break; } } // get header line - - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } - } - - input.close(); - output.close(); - } - catch(exception& e) { - m->errorOut(e, "AlignCommand", "appendReportFiles"); - exit(1); - } -} -//********************************************************************************************************************** +/* + * aligncommand.cpp + * Mothur + * + * Created by Sarah Westcott on 5/15/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + * This version of nast does everything I think that the greengenes nast server does and then some. I have added the + * feature of allowing users to define their database, kmer size for searching, alignment penalty values and alignment + * method. This latter feature is perhaps most significant. nastPlus enables a user to use either a Needleman-Wunsch + * (non-affine gap penalty) or Gotoh (affine gap penalty) pairwise alignment algorithm. This is significant because it + * allows for a global alignment and not the local alignment provided by bLAst. Furthermore, it has the potential to + * provide a better alignment because of the banding method employed by blast (I'm not sure about this). + * + */ + +#include "aligncommand.h" +#include "sequence.hpp" + +#include "gotohoverlap.hpp" +#include "needlemanoverlap.hpp" +#include "blastalign.hpp" +#include "noalign.hpp" + +#include "nast.hpp" +#include "nastreport.hpp" + + +//********************************************************************************************************************** + +AlignCommand::AlignCommand(string option) { + try { + + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + + //valid paramters for this command + string AlignArray[] = {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors","flip","threshold","outputdir","inputdir"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + + it = parameters.find("template"); + + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["template"] = inputDir + it->second; } + } + } + + //check for required parameters + templateFileName = validParameter.validFile(parameters, "template", true); + + if (templateFileName == "not found") { + m->mothurOut("template is a required parameter for the align.seqs command."); + m->mothurOutEndLine(); + abort = true; + }else if (templateFileName == "not open") { abort = true; } + + candidateFileName = validParameter.validFile(parameters, "candidate", false); + if (candidateFileName == "not found") { m->mothurOut("candidate is a required parameter for the align.seqs command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(candidateFileName, candidateFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < candidateFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(candidateFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { candidateFileNames[i] = inputDir + candidateFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + #ifdef USE_MPI + int pid; + MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ableToOpen = openInputFile(candidateFileNames[i], in); + in.close(); + + #ifdef USE_MPI + for (int j = 1; j < processors; j++) { + MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); + } + }else{ + MPI_Status status; + MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + } + + #endif + + if (ableToOpen == 1) { + m->mothurOut(candidateFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + candidateFileNames.erase(candidateFileNames.begin()+i); + i--; + } + + } + + //make sure there is at least one valid file left + if (candidateFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found"){ temp = "8"; } + convert(temp, kmerSize); + + temp = validParameter.validFile(parameters, "match", false); if (temp == "not found"){ temp = "1.0"; } + convert(temp, match); + + temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; } + convert(temp, misMatch); + + temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; } + convert(temp, gapOpen); + + temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; } + convert(temp, gapExtend); + + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } + convert(temp, processors); + + temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "f"; } + flip = isTrue(temp); + + temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "0.50"; } + convert(temp, threshold); + + search = validParameter.validFile(parameters, "search", false); if (search == "not found"){ search = "kmer"; } + + align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } + } + + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "AlignCommand"); + exit(1); + } +} + +//********************************************************************************************************************** + +AlignCommand::~AlignCommand(){ + + if (abort == false) { + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + delete templateDB; + delete alignment; + } +} + +//********************************************************************************************************************** + +void AlignCommand::help(){ + try { + m->mothurOut("The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n"); + m->mothurOut("The align.seqs command parameters are template, candidate, search, ksize, align, match, mismatch, gapopen and gapextend.\n"); + m->mothurOut("The template and candidate parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); + m->mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer.\n"); + m->mothurOut("The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n"); + m->mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 8.\n"); + m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"); + m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"); + m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"); + m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"); + m->mothurOut("The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold. The default is false.\n"); + m->mothurOut("The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.\n"); + m->mothurOut("If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.\n"); + m->mothurOut("The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.\n"); + m->mothurOut("The align.seqs command should be in the following format: \n"); + m->mothurOut("align.seqs(template=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n"); + m->mothurOut("Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. candidate), '=' and parameters (i.e.yourFastaFile).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "help"); + exit(1); + } +} + + +//********************************************************************************************************************** + +int AlignCommand::execute(){ + try { + if (abort == true) { return 0; } + + templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch); + int longestBase = templateDB->getLongestBase(); + + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } + else { + m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); + m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); + } + vector outputNames; + + for (int s = 0; s < candidateFileNames.size(); s++) { + if (m->control_pressed) { return 0; } + + m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine(); + + if (outputDir == "") { outputDir += hasPath(candidateFileNames[s]); } + string alignFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align"; + string reportFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align.report"; + string accnosFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "flip.accnos"; + bool hasAccnos = true; + + int numFastaSeqs = 0; + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + int start = time(NULL); + +#ifdef USE_MPI + int pid, end, numSeqsPerProcessor; + int tag = 2001; + vector MPIPos; + MPIWroteAccnos = false; + + MPI_Status status; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); + + MPI_File inMPI; + MPI_File outMPIAlign; + MPI_File outMPIReport; + MPI_File outMPIAccnos; + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + int inMode=MPI_MODE_RDONLY; + + char* outAlignFilename = new char[alignFileName.length()]; + memcpy(outAlignFilename, alignFileName.c_str(), alignFileName.length()); + + char* outReportFilename = new char[reportFileName.length()]; + memcpy(outReportFilename, reportFileName.c_str(), reportFileName.length()); + + char* outAccnosFilename = new char[accnosFileName.length()]; + memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); + + char* inFileName = new char[candidateFileNames[s].length()]; + memcpy(inFileName, candidateFileNames[s].c_str(), candidateFileNames[s].length()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + MPI_File_open(MPI_COMM_WORLD, outAlignFilename, outMode, MPI_INFO_NULL, &outMPIAlign); + MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport); + MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + + delete outAlignFilename; + delete inFileName; + delete outReportFilename; + delete outAccnosFilename; + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } + + if (pid == 0) { //you are the root process + + MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs + MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + + //figure out how many sequences you have to align + numSeqsPerProcessor = numFastaSeqs / processors; + if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } + int startIndex = pid * numSeqsPerProcessor; + + //align your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } + + for (int i = 1; i < processors; i++) { + bool tempResult; + MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); + if (tempResult != 0) { MPIWroteAccnos = true; } + } + }else{ //you are a child process + MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs + MPIPos.resize(numFastaSeqs+1); + MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + + //figure out how many sequences you have to align + numSeqsPerProcessor = numFastaSeqs / processors; + if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } + int startIndex = pid * numSeqsPerProcessor; + + //align your part + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); + + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); return 0; } + + MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); + } + + //close files + MPI_File_close(&inMPI); + MPI_File_close(&outMPIAlign); + MPI_File_close(&outMPIReport); + MPI_File_close(&outMPIAccnos); + + //delete accnos file if blank + if (pid == 0) { + //delete accnos file if its blank else report to user + if (MPIWroteAccnos) { + m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + }else { + //MPI_Info info; + //MPI_File_delete(outAccnosFilename, info); + hasAccnos = false; + remove(accnosFileName.c_str()); + } + } + +#else + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + ifstream inFASTA; + openInputFile(candidateFileNames[s], inFASTA); + numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); + + lines.push_back(new linePair(0, numFastaSeqs)); + + driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + + if (m->control_pressed) { + remove(accnosFileName.c_str()); + remove(alignFileName.c_str()); + remove(reportFileName.c_str()); + return 0; + } + + //delete accnos file if its blank else report to user + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + else { + m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + } + } + else{ + vector positions; + processIDS.resize(0); + + ifstream inFASTA; + openInputFile(candidateFileNames[s], inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + numFastaSeqs = positions.size(); + + int numSeqsPerProcessor = numFastaSeqs / processors; + + for (int i = 0; i < processors; i++) { + long int startPos = positions[ i * numSeqsPerProcessor ]; + if(i == processors - 1){ + numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + + rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str()); + rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str()); + + //append alignment and report files + for(int i=1;i nonBlankAccnosFiles; + //delete blank accnos files generated with multiple processes + for(int i=0;imothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + }else{ hasAccnos = false; } + + if (m->control_pressed) { + remove(accnosFileName.c_str()); + remove(alignFileName.c_str()); + remove(reportFileName.c_str()); + return 0; + } + } + #else + ifstream inFASTA; + openInputFile(candidateFileNames[s], inFASTA); + numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); + + lines.push_back(new linePair(0, numFastaSeqs)); + + driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); + + if (m->control_pressed) { + remove(accnosFileName.c_str()); + remove(alignFileName.c_str()); + remove(reportFileName.c_str()); + return 0; + } + + //delete accnos file if its blank else report to user + if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; } + else { + m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); + if (!flip) { + m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); + }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } + m->mothurOutEndLine(); + } + + #endif + +#endif + + + #ifdef USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif + + outputNames.push_back(alignFileName); + outputNames.push_back(reportFileName); + if (hasAccnos) { outputNames.push_back(accnosFileName); } + + #ifdef USE_MPI + } + #endif + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences."); + m->mothurOutEndLine(); + m->mothurOutEndLine(); + } + + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** + +int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName, string filename){ + try { + ofstream alignmentFile; + openOutputFile(alignFName, alignmentFile); + + ofstream accnosFile; + openOutputFile(accnosFName, accnosFile); + + NastReport report(reportFName); + + ifstream inFASTA; + openInputFile(filename, inFASTA); + + inFASTA.seekg(line->start); + + for(int i=0;inumSeqs;i++){ + + if (m->control_pressed) { return 0; } + + Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); + + int origNumBases = candidateSeq->getNumBases(); + string originalUnaligned = candidateSeq->getUnaligned(); + int numBasesNeeded = origNumBases * threshold; + + if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file + if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { + alignment->resize(candidateSeq->getUnaligned().length()+1); + } + + Sequence temp = templateDB->findClosestSequence(candidateSeq); + Sequence* templateSeq = &temp; + + float searchScore = templateDB->getSearchScore(); + + Nast* nast = new Nast(alignment, candidateSeq, templateSeq); + Sequence* copy; + + Nast* nast2; + bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below + //since nast does not make a copy of hte sequence passed, and it is used by the reporter below + //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place + //so this bool tells you if you need to delete it + + //if there is a possibility that this sequence should be reversed + if (candidateSeq->getNumBases() < numBasesNeeded) { + + string wasBetter = ""; + //if the user wants you to try the reverse + if (flip) { + //get reverse compliment + copy = new Sequence(candidateSeq->getName(), originalUnaligned); + copy->reverseComplement(); + + //rerun alignment + Sequence temp2 = templateDB->findClosestSequence(copy); + Sequence* templateSeq2 = &temp2; + + searchScore = templateDB->getSearchScore(); + + nast2 = new Nast(alignment, copy, templateSeq2); + + //check if any better + if (copy->getNumBases() > candidateSeq->getNumBases()) { + candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better + templateSeq = templateSeq2; + delete nast; + nast = nast2; + needToDeleteCopy = true; + }else{ + wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; + delete nast2; + delete copy; + } + } + + //create accnos file with names + accnosFile << candidateSeq->getName() << wasBetter << endl; + } + + report.setCandidate(candidateSeq); + report.setTemplate(templateSeq); + report.setSearchParameters(search, searchScore); + report.setAlignmentParameters(align, alignment); + report.setNastParameters(*nast); + + alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl; + + report.print(); + delete nast; + if (needToDeleteCopy) { delete copy; } + } + delete candidateSeq; + + //report progress + if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } + } + //report progress + if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine(); } + + alignmentFile.close(); + inFASTA.close(); + accnosFile.close(); + + return 1; + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "driver"); + exit(1); + } +} +//********************************************************************************************************************** +#ifdef USE_MPI +int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector& MPIPos){ + try { + string outputString = ""; + MPI_Status statusReport; + MPI_Status statusAlign; + MPI_Status statusAccnos; + MPI_Status status; + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + NastReport report; + + if (pid == 0) { + outputString = report.getHeaders(); + int length = outputString.length(); + + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(reportFile, buf, length, MPI_CHAR, &statusReport); + + delete buf; + } + + for(int i=0;icontrol_pressed) { return 0; } + + //read next sequence + int length = MPIPos[start+i+1] - MPIPos[start+i]; + + char* buf4 = new char[length]; + memcpy(buf4, outputString.c_str(), length); + + MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); + + string tempBuf = buf4; + + delete buf4; + + if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } + istringstream iss (tempBuf,istringstream::in); + + Sequence* candidateSeq = new Sequence(iss); + int origNumBases = candidateSeq->getNumBases(); + string originalUnaligned = candidateSeq->getUnaligned(); + int numBasesNeeded = origNumBases * threshold; + + if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file + if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { + alignment->resize(candidateSeq->getUnaligned().length()+1); + } + + Sequence temp = templateDB->findClosestSequence(candidateSeq); + Sequence* templateSeq = &temp; + + float searchScore = templateDB->getSearchScore(); + + Nast* nast = new Nast(alignment, candidateSeq, templateSeq); + Sequence* copy; + + Nast* nast2; + bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below + //since nast does not make a copy of hte sequence passed, and it is used by the reporter below + //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place + //so this bool tells you if you need to delete it + + //if there is a possibility that this sequence should be reversed + if (candidateSeq->getNumBases() < numBasesNeeded) { + + string wasBetter = ""; + //if the user wants you to try the reverse + if (flip) { + //get reverse compliment + copy = new Sequence(candidateSeq->getName(), originalUnaligned); + copy->reverseComplement(); + + //rerun alignment + Sequence temp2 = templateDB->findClosestSequence(copy); + Sequence* templateSeq2 = &temp2; + + searchScore = templateDB->getSearchScore(); + + nast2 = new Nast(alignment, copy, templateSeq2); + + //check if any better + if (copy->getNumBases() > candidateSeq->getNumBases()) { + candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better + templateSeq = templateSeq2; + delete nast; + nast = nast2; + needToDeleteCopy = true; + }else{ + wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; + delete nast2; + delete copy; + } + } + + //create accnos file with names + outputString = candidateSeq->getName() + wasBetter + "\n"; + + //send results to parent + int length = outputString.length(); + + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(accnosFile, buf, length, MPI_CHAR, &statusAccnos); + delete buf; + MPIWroteAccnos = true; + } + + report.setCandidate(candidateSeq); + report.setTemplate(templateSeq); + report.setSearchParameters(search, searchScore); + report.setAlignmentParameters(align, alignment); + report.setNastParameters(*nast); + + outputString = ">" + candidateSeq->getName() + "\n" + candidateSeq->getAligned() + "\n"; + + //send results to parent + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + + MPI_File_write_shared(alignFile, buf2, length, MPI_CHAR, &statusAlign); + + delete buf2; + + outputString = report.getReport(); + + //send results to parent + length = outputString.length(); + char* buf3 = new char[length]; + memcpy(buf3, outputString.c_str(), length); + + MPI_File_write_shared(reportFile, buf3, length, MPI_CHAR, &statusReport); + + delete buf3; + delete nast; + if (needToDeleteCopy) { delete copy; } + } + delete candidateSeq; + + //report progress + if((i+1) % 100 == 0){ cout << (toString(i+1)) << endl; } + } + //report progress + if((num) % 100 != 0){ cout << (toString(num)) << endl; } + + return 1; + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "driverMPI"); + exit(1); + } +} +#endif +/**************************************************************************************************/ + +int AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename) { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 0; + int exitCommand = 1; + // processIDS.resize(0); + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + exitCommand = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + exit(0); + }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + } + + //force parent to wait until all the processes are done + for (int i=0;ierrorOut(e, "AlignCommand", "createProcesses"); + exit(1); + } +} + +/**************************************************************************************************/ + +void AlignCommand::appendAlignFiles(string temp, string filename) { + try{ + + ofstream output; + ifstream input; + openOutputFileAppend(filename, output); + openInputFile(temp, input); + + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } + } + + input.close(); + output.close(); + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "appendAlignFiles"); + exit(1); + } +} +//********************************************************************************************************************** + +void AlignCommand::appendReportFiles(string temp, string filename) { + try{ + + ofstream output; + ifstream input; + openOutputFileAppend(filename, output); + openInputFile(temp, input); + + while (!input.eof()) { char c = input.get(); if (c == 10 || c == 13){ break; } } // get header line + + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } + } + + input.close(); + output.close(); + } + catch(exception& e) { + m->errorOut(e, "AlignCommand", "appendReportFiles"); + exit(1); + } +} +//********************************************************************************************************************** diff --git a/alignmentdb.cpp b/alignmentdb.cpp index 4b324b4..1ea1b98 100644 --- a/alignmentdb.cpp +++ b/alignmentdb.cpp @@ -1,259 +1,262 @@ -/* - * alignmentdb.cpp - * Mothur - * - * Created by westcott on 11/4/09. - * Copyright 2009 Schloss Lab. All rights reserved. - * - */ - -#include "alignmentdb.h" -#include "kmerdb.hpp" -#include "suffixdb.hpp" -#include "blastdb.hpp" - - -/**************************************************************************************************/ -AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may - try { // need to alter this in the future? - m = MothurOut::getInstance(); - longest = 0; - method = s; - bool needToGenerate = true; - - m->mothurOutEndLine(); - m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush(); - - #ifdef USE_MPI - int pid; - vector positions; - - MPI_Status status; - MPI_File inMPI; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - char inFileName[fastaFileName.length()]; - strcpy(inFileName, fastaFileName.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - - if (pid == 0) { - positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs - - //send file positions to all processes - MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs - MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos - }else{ - MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs - positions.resize(numSeqs+1); - MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions - } - - //read file - for(int i=0;icontrol_pressed) { templateSequences.clear(); break; } - - //read next sequence - int length = positions[i+1] - positions[i]; - char buf4[length]; - MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status); - - string tempBuf = buf4; - if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } - - istringstream iss (tempBuf,istringstream::in); - - Sequence temp(iss); - if (temp.getName() != "") { - templateSequences.push_back(temp); - //save longest base - if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } - } - } - - MPI_File_close(&inMPI); - #else - ifstream fastaFile; - openInputFile(fastaFileName, fastaFile); - - while (!fastaFile.eof()) { - Sequence temp(fastaFile); gobble(fastaFile); - - if (m->control_pressed) { templateSequences.clear(); break; } - - if (temp.getName() != "") { - templateSequences.push_back(temp); - //save longest base - if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } - } - } - fastaFile.close(); - - #endif - - numSeqs = templateSequences.size(); - //all of this is elsewhere already! - - m->mothurOut("DONE."); - m->mothurOutEndLine(); cout.flush(); - - //in case you delete the seqs and then ask for them - emptySequence = Sequence(); - emptySequence.setName("no_match"); - emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - - - string kmerDBName; - if(method == "kmer") { - search = new KmerDB(fastaFileName, kmerSize); - - #ifdef USE_MPI - #else - kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); - - if(kmerFileTest){ needToGenerate = false; } - #endif - } - else if(method == "suffix") { search = new SuffixDB(numSeqs); } - else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); } - else { - m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); - m->mothurOutEndLine(); - search = new KmerDB(fastaFileName, 8); - } - - if (!(m->control_pressed)) { - if (needToGenerate) { - //add sequences to search - for (int i = 0; i < templateSequences.size(); i++) { - search->addSequence(templateSequences[i]); - - if (m->control_pressed) { templateSequences.clear(); break; } - } - - if (m->control_pressed) { templateSequences.clear(); } - - search->generateDB(); - - }else if ((method == "kmer") && (!needToGenerate)) { - ifstream kmerFileTest(kmerDBName.c_str()); - search->readKmerDB(kmerFileTest); - } - - search->setNumSeqs(numSeqs); - } - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "AlignmentDB"); - exit(1); - } -} -/**************************************************************************************************/ -AlignmentDB::AlignmentDB(string s){ - try { - m = MothurOut::getInstance(); - method = s; - - if(method == "suffix") { search = new SuffixDB(); } - else if(method == "blast") { search = new BlastDB(); } - else { search = new KmerDB(); } - - - //in case you delete the seqs and then ask for them - emptySequence = Sequence(); - emptySequence.setName("no_match"); - emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "AlignmentDB"); - exit(1); - } -} -/**************************************************************************************************/ -AlignmentDB::~AlignmentDB() { delete search; } -/**************************************************************************************************/ -Sequence AlignmentDB::findClosestSequence(Sequence* seq) { - try{ - - vector spot = search->findClosestSequences(seq, 1); - - if (spot.size() != 0) { return templateSequences[spot[0]]; } - else { return emptySequence; } - - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "findClosestSequence"); - exit(1); - } -} -#ifdef USE_MPI -/**************************************************************************************************/ -int AlignmentDB::MPISend(int receiver) { - try { - - //send numSeqs - int - MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); - - //send longest - int - MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); - - //send templateSequences - for (int i = 0; i < templateSequences.size(); i++) { - templateSequences[i].MPISend(receiver); - } - - //send Database - search->MPISend(receiver); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "MPISend"); - exit(1); - } -} -/**************************************************************************************************/ -int AlignmentDB::MPIRecv(int sender) { - try { - MPI_Status status; - //receive numSeqs - int - MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status); - - //receive longest - int - MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status); - - //receive templateSequences - templateSequences.resize(numSeqs); - for (int i = 0; i < templateSequences.size(); i++) { - templateSequences[i].MPIRecv(sender); - } - - //receive Database - search->MPIRecv(sender); - - for (int i = 0; i < templateSequences.size(); i++) { - search->addSequence(templateSequences[i]); - } - search->generateDB(); - search->setNumSeqs(numSeqs); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "MPIRecv"); - exit(1); - } -} -#endif -/**************************************************************************************************/ - - - - - - +/* + * alignmentdb.cpp + * Mothur + * + * Created by westcott on 11/4/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "alignmentdb.h" +#include "kmerdb.hpp" +#include "suffixdb.hpp" +#include "blastdb.hpp" + + +/**************************************************************************************************/ +AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may + try { // need to alter this in the future? + m = MothurOut::getInstance(); + longest = 0; + method = s; + bool needToGenerate = true; + + m->mothurOutEndLine(); + m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush(); + + #ifdef USE_MPI + int pid; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + char inFileName[1024]; + strcpy(inFileName, fastaFileName.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + + if (pid == 0) { + positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs + MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + }else{ + MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs + positions.resize(numSeqs+1); + MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + } + + //read file + for(int i=0;icontrol_pressed) { templateSequences.clear(); break; } + + //read next sequence + int length = positions[i+1] - positions[i]; + char* buf4 = new char[length]; + + MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status); + + string tempBuf = buf4; + if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } + delete buf4; + + istringstream iss (tempBuf,istringstream::in); + + Sequence temp(iss); + if (temp.getName() != "") { + templateSequences.push_back(temp); + //save longest base + if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } + } + } + + MPI_File_close(&inMPI); + + #else + ifstream fastaFile; + openInputFile(fastaFileName, fastaFile); + + while (!fastaFile.eof()) { + Sequence temp(fastaFile); gobble(fastaFile); + + if (m->control_pressed) { templateSequences.clear(); break; } + + if (temp.getName() != "") { + templateSequences.push_back(temp); + //save longest base + if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } + } + } + fastaFile.close(); + + #endif + + numSeqs = templateSequences.size(); + //all of this is elsewhere already! + + m->mothurOut("DONE."); + m->mothurOutEndLine(); cout.flush(); + + //in case you delete the seqs and then ask for them + emptySequence = Sequence(); + emptySequence.setName("no_match"); + emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + + + string kmerDBName; + if(method == "kmer") { + search = new KmerDB(fastaFileName, kmerSize); + + #ifdef USE_MPI + #else + kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; + ifstream kmerFileTest(kmerDBName.c_str()); + + if(kmerFileTest){ needToGenerate = false; } + #endif + } + else if(method == "suffix") { search = new SuffixDB(numSeqs); } + else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); } + else { + m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); + m->mothurOutEndLine(); + search = new KmerDB(fastaFileName, 8); + } + + if (!(m->control_pressed)) { + if (needToGenerate) { + //add sequences to search + for (int i = 0; i < templateSequences.size(); i++) { + search->addSequence(templateSequences[i]); + + if (m->control_pressed) { templateSequences.clear(); break; } + } + + if (m->control_pressed) { templateSequences.clear(); } + + search->generateDB(); + + }else if ((method == "kmer") && (!needToGenerate)) { + ifstream kmerFileTest(kmerDBName.c_str()); + search->readKmerDB(kmerFileTest); + } + + search->setNumSeqs(numSeqs); + } + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "AlignmentDB"); + exit(1); + } +} +/**************************************************************************************************/ +AlignmentDB::AlignmentDB(string s){ + try { + m = MothurOut::getInstance(); + method = s; + + if(method == "suffix") { search = new SuffixDB(); } + else if(method == "blast") { search = new BlastDB(); } + else { search = new KmerDB(); } + + + //in case you delete the seqs and then ask for them + emptySequence = Sequence(); + emptySequence.setName("no_match"); + emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "AlignmentDB"); + exit(1); + } +} +/**************************************************************************************************/ +AlignmentDB::~AlignmentDB() { delete search; } +/**************************************************************************************************/ +Sequence AlignmentDB::findClosestSequence(Sequence* seq) { + try{ + + vector spot = search->findClosestSequences(seq, 1); + + if (spot.size() != 0) { return templateSequences[spot[0]]; } + else { return emptySequence; } + + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "findClosestSequence"); + exit(1); + } +} +#ifdef USE_MPI +/**************************************************************************************************/ +int AlignmentDB::MPISend(int receiver) { + try { + + //send numSeqs - int + MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); + + //send longest - int + MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); + + //send templateSequences + for (int i = 0; i < templateSequences.size(); i++) { + templateSequences[i].MPISend(receiver); + } + + //send Database + search->MPISend(receiver); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "MPISend"); + exit(1); + } +} +/**************************************************************************************************/ +int AlignmentDB::MPIRecv(int sender) { + try { + MPI_Status status; + //receive numSeqs - int + MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status); + + //receive longest - int + MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status); + + //receive templateSequences + templateSequences.resize(numSeqs); + for (int i = 0; i < templateSequences.size(); i++) { + templateSequences[i].MPIRecv(sender); + } + + //receive Database + search->MPIRecv(sender); + + for (int i = 0; i < templateSequences.size(); i++) { + search->addSequence(templateSequences[i]); + } + search->generateDB(); + search->setNumSeqs(numSeqs); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "MPIRecv"); + exit(1); + } +} +#endif +/**************************************************************************************************/ + + + + + + diff --git a/bellerophon.cpp b/bellerophon.cpp index 25c5de7..e33230e 100644 --- a/bellerophon.cpp +++ b/bellerophon.cpp @@ -158,11 +158,12 @@ int Bellerophon::print(MPI_File& out, MPI_File& outAcc) { MPI_Status status; int length = outString.length(); - char buf2[length]; - strcpy(buf2, outString.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); - + + delete buf2; //calc # of seqs with preference above 95%tile if (best[i].score >= cutoffScore) { @@ -172,11 +173,13 @@ int Bellerophon::print(MPI_File& out, MPI_File& outAcc) { MPI_Status statusAcc; length = outAccString.length(); - char buf[length]; - strcpy(buf, outAccString.c_str()); + char* buf = new char[length]; + memcpy(buf, outAccString.c_str(), length); MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); + delete buf; + cout << best[i].name << " is a suspected chimera at breakpoint " << toString(best[i].midpoint) << endl; cout << "It's score is " << toString(best[i].score) << " with suspected left parent " << best[i].leftParent << " and right parent " << best[i].rightParent << endl; } @@ -261,12 +264,13 @@ int Bellerophon::getChimeras() { int length; MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status); - char buf[length]; + char* buf = new char[length]; MPI_Recv(&buf, length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status); string temp = buf; if (temp.length() > length) { temp = temp.substr(0, length); } - + delete buf; + MPIBestSend.push_back(temp); } @@ -287,11 +291,12 @@ int Bellerophon::getChimeras() { if (m->control_pressed) { return 0; } int bestLength = MPIBestSend[i].length(); - char buf[bestLength]; - strcpy(buf, MPIBestSend[i].c_str()); + char* buf = new char[bestLength]; + memcpy(buf, MPIBestSend[i].c_str(), bestLength); MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD); + delete buf; } MPIBestSend.clear(); diff --git a/ccode.cpp b/ccode.cpp index 56856a9..5fdf5d1 100644 --- a/ccode.cpp +++ b/ccode.cpp @@ -29,13 +29,15 @@ Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int num #ifdef USE_MPI - char inFileName[mapInfo.length()]; - strcpy(inFileName, mapInfo.c_str()); + char* inFileName = new char[mapInfo.length()]; + memcpy(inFileName, mapInfo.c_str(), mapInfo.length()); int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap); //comm, filename, mode, info, filepointer + delete inFileName; + int pid; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -44,10 +46,11 @@ Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int num MPI_Status status; int length = outString.length(); - char buf2[length]; - strcpy(buf2, outString.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status); + delete buf2; } #else @@ -235,21 +238,23 @@ int Ccode::print(MPI_File& out, MPI_File& outAcc) { MPI_Status status; int length = outString.length(); - char buf2[length]; - strcpy(buf2, outString.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); - + delete buf2; + if (results) { m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine(); outAccString += querySeq->getName() + "\n"; MPI_Status statusAcc; length = outAccString.length(); - char buf[length]; - strcpy(buf, outAccString.c_str()); + char* buf = new char[length]; + memcpy(buf, outAccString.c_str(), length); MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); + delete buf; } //free memory @@ -267,10 +272,11 @@ int Ccode::printMapping(string& output) { try { MPI_Status status; int length = output.length(); - char buf[length]; - strcpy(buf, output.c_str()); + char* buf = new char[length]; + memcpy(buf, output.c_str(), length); MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status); + delete buf; } catch(exception& e) { diff --git a/chimera.cpp b/chimera.cpp index 692a4fe..32351f9 100644 --- a/chimera.cpp +++ b/chimera.cpp @@ -108,12 +108,13 @@ vector Chimera::readSeqs(string file) { MPI_Status status; MPI_File inMPI; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - char inFileName[file.length()]; - strcpy(inFileName, file.c_str()); + + char* inFileName = new char[file.length()]; + memcpy(inFileName, file.c_str(), file.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - + delete inFileName; + if (pid == 0) { positions = setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs @@ -133,12 +134,14 @@ vector Chimera::readSeqs(string file) { //read next sequence int seqlength = positions[i+1] - positions[i]; - char buf4[seqlength]; + char* buf4 = new char[seqlength]; + MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); } - + delete buf4; + istringstream iss (tempBuf,istringstream::in); Sequence* current = new Sequence(iss); @@ -196,18 +199,22 @@ void Chimera::setMask(string filename) { MPI_Offset size; MPI_Status status; - char inFileName[filename.length()]; - strcpy(inFileName, filename.c_str()); + char* inFileName = new char[filename.length()]; + memcpy(inFileName, filename.c_str(), filename.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_get_size(inMPI, &size); + + delete inFileName; - char buffer[size]; + char* buffer = new char[size]; MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); string tempBuf = buffer; if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } istringstream iss (tempBuf,istringstream::in); + + delete buffer; if (!iss.eof()) { Sequence temp(iss); diff --git a/chimerabellerophoncommand.cpp b/chimerabellerophoncommand.cpp index f36d7a0..69eb895 100644 --- a/chimerabellerophoncommand.cpp +++ b/chimerabellerophoncommand.cpp @@ -136,15 +136,19 @@ int ChimeraBellerophonCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - char outFilename[accnosFileName.length()]; - strcpy(outFilename, accnosFileName.c_str()); + char* outFilename = new char[accnosFileName.length()]; + memcpy(outFilename, accnosFileName.c_str(), accnosFileName.length()); + + char* FileName = new char[outputFileName.length()]; + memcpy(FileName, outputFileName.c_str(), outputFileName.length()); - char FileName[outputFileName.length()]; - strcpy(FileName, outputFileName.c_str()); MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + delete FileName; + delete outFilename; + numSeqs = chimera->print(outMPI, outMPIAccnos); MPI_File_close(&outMPI); diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp index a18fd4f..6958c00 100644 --- a/chimeraccodecommand.cpp +++ b/chimeraccodecommand.cpp @@ -190,20 +190,24 @@ int ChimeraCcodeCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - - char outFilename[outputFileName.length()]; - strcpy(outFilename, outputFileName.c_str()); - - char outAccnosFilename[accnosFileName.length()]; - strcpy(outAccnosFilename, accnosFileName.c_str()); + + char* outFilename = new char[outputFileName.length()]; + memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); - char inFileName[fastafile.length()]; - strcpy(inFileName, fastafile.c_str()); + char* outAccnosFilename = new char[accnosFileName.length()]; + memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); + + char* inFileName = new char[fastafile.length()]; + memcpy(inFileName, fastafile.c_str(), fastafile.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); - + + delete inFileName; + delete outFilename; + delete outAccnosFilename; + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } if (pid == 0) { //you are the root process @@ -211,10 +215,12 @@ int ChimeraCcodeCommand::execute(){ //print header int length = outTemp.length(); - char buf2[length]; - strcpy(buf2, outTemp.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outTemp.c_str(), length); + MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status); - + delete buf2; + MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs //send file positions to all processes @@ -493,12 +499,14 @@ int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; + char* buf4 = new char[length]; + MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); + delete buf4; Sequence* candidateSeq = new Sequence(iss); gobble(iss); diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp index d80cce6..fe919de 100644 --- a/chimeracheckcommand.cpp +++ b/chimeracheckcommand.cpp @@ -166,16 +166,19 @@ int ChimeraCheckCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - - char outFilename[outputFileName.length()]; - strcpy(outFilename, outputFileName.c_str()); - char inFileName[fastafile.length()]; - strcpy(inFileName, fastafile.c_str()); + char* outFilename = new char[outputFileName.length()]; + memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); + + char* inFileName = new char[fastafile.length()]; + memcpy(inFileName, fastafile.c_str(), fastafile.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); + delete outFilename; + delete inFileName; + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); delete chimera; return 0; } if (pid == 0) { //you are the root process @@ -391,12 +394,13 @@ int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; + char* buf4 = new char[length]; MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); + delete buf4; Sequence* candidateSeq = new Sequence(iss); gobble(iss); diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp index 51a3d9b..cb101b6 100644 --- a/chimeracheckrdp.cpp +++ b/chimeracheckrdp.cpp @@ -97,11 +97,12 @@ int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) { MPI_Status status; int length = outString.length(); - char buf[length]; - strcpy(buf, outString.c_str()); + char* buf = new char[length]; + memcpy(buf, outString.c_str(), length); MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); - + delete buf; + if (svg) { if (name != "") { //if user has specific names map::iterator it = names.find(querySeq->getName()); @@ -261,19 +262,22 @@ void ChimeraCheckRDP::readName(string namefile) { MPI_File inMPI; MPI_Offset size; MPI_Status status; - - char inFileName[namefile.length()]; - strcpy(inFileName, namefile.c_str()); + + char* inFileName = new char[namefile.length()]; + memcpy(inFileName, namefile.c_str(), namefile.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); MPI_File_get_size(inMPI, &size); - char buffer[size]; + delete inFileName; + + char* buffer = new char[size]; MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); string tempBuf = buffer; if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } istringstream iss (tempBuf,istringstream::in); + delete buffer; while(!iss.eof()) { iss >> name; gobble(iss); @@ -347,12 +351,14 @@ void ChimeraCheckRDP::makeSVGpic(vector info) { MPI_File outSVG; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - - char FileName[file.length()]; - strcpy(FileName, file.c_str()); + + char* FileName = new char[file.length()]; + memcpy(FileName, file.c_str(), file.length()); MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG); //comm, filename, mode, info, filepointer + delete FileName; + int width = (info.size()*5) + 150; string outString = ""; @@ -398,10 +404,11 @@ void ChimeraCheckRDP::makeSVGpic(vector info) { MPI_Status status; int length = outString.length(); - char buf2[length]; - strcpy(buf2, outString.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status); + delete buf2; MPI_File_close(&outSVG); diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index f565fd3..40a1610 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -216,20 +216,24 @@ int ChimeraPintailCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - - char outFilename[outputFileName.length()]; - strcpy(outFilename, outputFileName.c_str()); - char outAccnosFilename[accnosFileName.length()]; - strcpy(outAccnosFilename, accnosFileName.c_str()); + char* outFilename = new char[outputFileName.length()]; + memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); - char inFileName[fastafile.length()]; - strcpy(inFileName, fastafile.c_str()); + char* outAccnosFilename = new char[accnosFileName.length()]; + memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); + + char* inFileName = new char[fastafile.length()]; + memcpy(inFileName, fastafile.c_str(), fastafile.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + delete inFileName; + delete outFilename; + delete outAccnosFilename; + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } if (pid == 0) { //you are the root process @@ -490,12 +494,13 @@ int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fi //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; + char* buf4 = new char[length]; MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); + delete buf4; Sequence* candidateSeq = new Sequence(iss); gobble(iss); diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index fa706ea..c30404f 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -221,10 +221,11 @@ int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { //write to accnos file int length = outAccString.length(); - char buf2[length]; - strcpy(buf2, outAccString.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outAccString.c_str(), length); MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); + delete buf2; } } @@ -235,10 +236,11 @@ int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { //write to output file int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); - + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; return results; } diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 1a5ae09..777584b 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -8,10 +8,6 @@ */ #include "chimeraslayercommand.h" -#include "bellerophon.h" -#include "pintail.h" -#include "ccode.h" -#include "chimeracheckrdp.h" #include "chimeraslayer.h" @@ -220,20 +216,24 @@ int ChimeraSlayerCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - - char outFilename[outputFileName.length()]; - strcpy(outFilename, outputFileName.c_str()); - char outAccnosFilename[accnosFileName.length()]; - strcpy(outAccnosFilename, accnosFileName.c_str()); + char* outFilename = new char[outputFileName.length()]; + memcpy(outFilename, outputFileName.c_str(), outputFileName.length()); - char inFileName[fastafile.length()]; - strcpy(inFileName, fastafile.c_str()); + char* outAccnosFilename = new char[accnosFileName.length()]; + memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length()); + + char* inFileName = new char[fastafile.length()]; + memcpy(inFileName, fastafile.c_str(), fastafile.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + delete inFileName; + delete outFilename; + delete outAccnosFilename; + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); delete chimera; return 0; } @@ -246,10 +246,12 @@ int ChimeraSlayerCommand::execute(){ //print header int length = outTemp.length(); - char buf2[length]; - strcpy(buf2, outTemp.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outTemp.c_str(), length); + MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status); - + delete buf2; + MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs //send file positions to all processes @@ -521,12 +523,13 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; + char* buf4 = new char[length]; MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); + delete buf4; Sequence* candidateSeq = new Sequence(iss); gobble(iss); diff --git a/classify.cpp b/classify.cpp index 557f17c..f09cbf5 100644 --- a/classify.cpp +++ b/classify.cpp @@ -1,263 +1,266 @@ -/* - * classify.cpp - * Mothur - * - * Created by westcott on 11/3/09. - * Copyright 2009 Schloss Lab. All rights reserved. - * - */ - -#include "classify.h" -#include "sequence.hpp" -#include "kmerdb.hpp" -#include "suffixdb.hpp" -#include "blastdb.hpp" -#include "distancedb.hpp" - -/**************************************************************************************************/ -Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) { - try { - m = MothurOut::getInstance(); - readTaxonomy(taxFile); - - int start = time(NULL); - int numSeqs = 0; - - m->mothurOut("Generating search database... "); cout.flush(); -#ifdef USE_MPI - int pid; - vector positions; - - MPI_Status status; - MPI_File inMPI; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - char inFileName[tempFile.length()]; - strcpy(inFileName, tempFile.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - - if (pid == 0) { //only one process needs to scan file - positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs - - //send file positions to all processes - MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs - MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos - }else{ - MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs - positions.resize(numSeqs); - MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions - } - - //create database - if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); } - else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); } - else if(method == "distance") { database = new DistanceDB(); } - else { - m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine(); - database = new KmerDB(tempFile, 8); - } - - //read file - for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } - - istringstream iss (tempBuf,istringstream::in); - - Sequence temp(iss); - if (temp.getName() != "") { - names.push_back(temp.getName()); - database->addSequence(temp); - } - } - - database->generateDB(); - MPI_File_close(&inMPI); - #else - - //need to know number of template seqs for suffixdb - if (method == "suffix") { - ifstream inFASTA; - openInputFile(tempFile, inFASTA); - numSeqs = count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); - inFASTA.close(); - } - - bool needToGenerate = true; - string kmerDBName; - if(method == "kmer") { - database = new KmerDB(tempFile, kmerSize); - - kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); - if(kmerFileTest){ needToGenerate = false; } - } - else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); } - else if(method == "distance") { database = new DistanceDB(); } - else { - m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); - m->mothurOutEndLine(); - database = new KmerDB(tempFile, 8); - } - - if (needToGenerate) { - ifstream fastaFile; - openInputFile(tempFile, fastaFile); - - while (!fastaFile.eof()) { - Sequence temp(fastaFile); - gobble(fastaFile); - - names.push_back(temp.getName()); - - database->addSequence(temp); - } - fastaFile.close(); - - database->generateDB(); - - }else if ((method == "kmer") && (!needToGenerate)) { - ifstream kmerFileTest(kmerDBName.c_str()); - database->readKmerDB(kmerFileTest); - - ifstream fastaFile; - openInputFile(tempFile, fastaFile); - - while (!fastaFile.eof()) { - Sequence temp(fastaFile); - gobble(fastaFile); - - names.push_back(temp.getName()); - } - fastaFile.close(); - } -#endif - database->setNumSeqs(names.size()); - - m->mothurOut("DONE."); m->mothurOutEndLine(); - m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); - - } - catch(exception& e) { - m->errorOut(e, "Classify", "Classify"); - exit(1); - } -} -/**************************************************************************************************/ - -void Classify::readTaxonomy(string file) { - try { - - phyloTree = new PhyloTree(); - string name, taxInfo; - - m->mothurOutEndLine(); - m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush(); - -#ifdef USE_MPI - int pid, num; - vector positions; - - MPI_Status status; - MPI_File inMPI; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - char inFileName[file.length()]; - strcpy(inFileName, file.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - - if (pid == 0) { - positions = setFilePosEachLine(file, num); - - //send file positions to all processes - MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs - MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos - }else{ - MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs - positions.resize(num); - MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions - } - - //read file - for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } - - istringstream iss (tempBuf,istringstream::in); - iss >> name >> taxInfo; - taxonomy[name] = taxInfo; - phyloTree->addSeqToTree(name, taxInfo); - } - - MPI_File_close(&inMPI); -#else - ifstream inTax; - openInputFile(file, inTax); - - //read template seqs and save - while (!inTax.eof()) { - inTax >> name >> taxInfo; - - taxonomy[name] = taxInfo; - - phyloTree->addSeqToTree(name, taxInfo); - - gobble(inTax); - } - inTax.close(); -#endif - - phyloTree->assignHeirarchyIDs(0); - - m->mothurOut("DONE."); - m->mothurOutEndLine(); cout.flush(); - - } - catch(exception& e) { - m->errorOut(e, "Classify", "readTaxonomy"); - exit(1); - } -} -/**************************************************************************************************/ - -vector Classify::parseTax(string tax) { - try { - vector taxons; - - tax = tax.substr(0, tax.length()-1); //get rid of last ';' - - //parse taxonomy - string individual; - while (tax.find_first_of(';') != -1) { - individual = tax.substr(0,tax.find_first_of(';')); - tax = tax.substr(tax.find_first_of(';')+1, tax.length()); - taxons.push_back(individual); - - } - //get last one - taxons.push_back(tax); - - return taxons; - } - catch(exception& e) { - m->errorOut(e, "Classify", "parseTax"); - exit(1); - } -} -/**************************************************************************************************/ - +/* + * classify.cpp + * Mothur + * + * Created by westcott on 11/3/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "classify.h" +#include "sequence.hpp" +#include "kmerdb.hpp" +#include "suffixdb.hpp" +#include "blastdb.hpp" +#include "distancedb.hpp" + +/**************************************************************************************************/ +Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) { + try { + m = MothurOut::getInstance(); + readTaxonomy(taxFile); + + int start = time(NULL); + int numSeqs = 0; + + m->mothurOut("Generating search database... "); cout.flush(); +#ifdef USE_MPI + int pid; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + char* inFileName = new char[tempFile.length()]; + memcpy(inFileName, tempFile.c_str(), tempFile.length()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + delete inFileName; + + if (pid == 0) { //only one process needs to scan file + positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs + MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + }else{ + MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs + positions.resize(numSeqs); + MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + } + + //create database + if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); } + else if(method == "suffix") { database = new SuffixDB(numSeqs); } + else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); } + else if(method == "distance") { database = new DistanceDB(); } + else { + m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine(); + database = new KmerDB(tempFile, 8); + } + + //read file + for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } + delete buf4; + istringstream iss (tempBuf,istringstream::in); + + Sequence temp(iss); + if (temp.getName() != "") { + names.push_back(temp.getName()); + database->addSequence(temp); + } + } + + database->generateDB(); + MPI_File_close(&inMPI); + #else + + //need to know number of template seqs for suffixdb + if (method == "suffix") { + ifstream inFASTA; + openInputFile(tempFile, inFASTA); + numSeqs = count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); + } + + bool needToGenerate = true; + string kmerDBName; + if(method == "kmer") { + database = new KmerDB(tempFile, kmerSize); + + kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; + ifstream kmerFileTest(kmerDBName.c_str()); + if(kmerFileTest){ needToGenerate = false; } + } + else if(method == "suffix") { database = new SuffixDB(numSeqs); } + else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); } + else if(method == "distance") { database = new DistanceDB(); } + else { + m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); + m->mothurOutEndLine(); + database = new KmerDB(tempFile, 8); + } + + if (needToGenerate) { + ifstream fastaFile; + openInputFile(tempFile, fastaFile); + + while (!fastaFile.eof()) { + Sequence temp(fastaFile); + gobble(fastaFile); + + names.push_back(temp.getName()); + + database->addSequence(temp); + } + fastaFile.close(); + + database->generateDB(); + + }else if ((method == "kmer") && (!needToGenerate)) { + ifstream kmerFileTest(kmerDBName.c_str()); + database->readKmerDB(kmerFileTest); + + ifstream fastaFile; + openInputFile(tempFile, fastaFile); + + while (!fastaFile.eof()) { + Sequence temp(fastaFile); + gobble(fastaFile); + + names.push_back(temp.getName()); + } + fastaFile.close(); + } +#endif + database->setNumSeqs(names.size()); + + m->mothurOut("DONE."); m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); + + } + catch(exception& e) { + m->errorOut(e, "Classify", "Classify"); + exit(1); + } +} +/**************************************************************************************************/ + +void Classify::readTaxonomy(string file) { + try { + + phyloTree = new PhyloTree(); + string name, taxInfo; + + m->mothurOutEndLine(); + m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush(); + +#ifdef USE_MPI + int pid, num; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + char* inFileName = new char[file.length()]; + memcpy(inFileName, file.c_str(), file.length()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + delete inFileName; + + if (pid == 0) { + positions = setFilePosEachLine(file, num); + + //send file positions to all processes + MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs + MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + }else{ + MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs + positions.resize(num); + MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + } + + //read file + for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } + delete buf4; + + istringstream iss (tempBuf,istringstream::in); + iss >> name >> taxInfo; + taxonomy[name] = taxInfo; + phyloTree->addSeqToTree(name, taxInfo); + } + + MPI_File_close(&inMPI); +#else + ifstream inTax; + openInputFile(file, inTax); + + //read template seqs and save + while (!inTax.eof()) { + inTax >> name >> taxInfo; + + taxonomy[name] = taxInfo; + + phyloTree->addSeqToTree(name, taxInfo); + + gobble(inTax); + } + inTax.close(); +#endif + + phyloTree->assignHeirarchyIDs(0); + + m->mothurOut("DONE."); + m->mothurOutEndLine(); cout.flush(); + + } + catch(exception& e) { + m->errorOut(e, "Classify", "readTaxonomy"); + exit(1); + } +} +/**************************************************************************************************/ + +vector Classify::parseTax(string tax) { + try { + vector taxons; + + tax = tax.substr(0, tax.length()-1); //get rid of last ';' + + //parse taxonomy + string individual; + while (tax.find_first_of(';') != -1) { + individual = tax.substr(0,tax.find_first_of(';')); + tax = tax.substr(tax.find_first_of(';')+1, tax.length()); + taxons.push_back(individual); + + } + //get last one + taxons.push_back(tax); + + return taxons; + } + catch(exception& e) { + m->errorOut(e, "Classify", "parseTax"); + exit(1); + } +} +/**************************************************************************************************/ + diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 26df57f..e4fcb2b 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -313,7 +313,6 @@ int ClassifySeqsCommand::execute(){ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; int tag = 2001; vector MPIPos; @@ -328,20 +327,24 @@ int ClassifySeqsCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - - char outNewTax[newTaxonomyFile.length()]; - strcpy(outNewTax, newTaxonomyFile.c_str()); - - char outTempTax[tempTaxonomyFile.length()]; - strcpy(outTempTax, tempTaxonomyFile.c_str()); - char inFileName[fastaFileNames[s].length()]; - strcpy(inFileName, fastaFileNames[s].c_str()); + char* outNewTax = new char[newTaxonomyFile.length()]; + memcpy(outNewTax, newTaxonomyFile.c_str(), newTaxonomyFile.length()); + + char* outTempTax = new char[tempTaxonomyFile.length()]; + memcpy(outTempTax, tempTaxonomyFile.c_str(), tempTaxonomyFile.length()); + + char* inFileName = new char[fastaFileNames[s].length()]; + memcpy(inFileName, fastaFileNames[s].c_str(), fastaFileNames[s].length()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax); MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax); + delete outNewTax; + delete outTempTax; + delete inFileName; + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; } if(namefile != "") { MPIReadNamesFile(namefileNames[s]); } @@ -393,7 +396,6 @@ int ClassifySeqsCommand::execute(){ MPI_File_close(&outMPITempTax); #else - //read namefile if(namefile != "") { nameMap.clear(); //remove old names @@ -471,9 +473,7 @@ int ClassifySeqsCommand::execute(){ driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); #endif #endif - - delete classify; - + #ifdef USE_MPI if (pid == 0) { //this part does not need to be paralellized #endif @@ -488,7 +488,6 @@ int ClassifySeqsCommand::execute(){ //read in users taxonomy file and add sequences to tree string name, taxon; - while(!in.eof()){ in >> name >> taxon; gobble(in); @@ -566,6 +565,7 @@ int ClassifySeqsCommand::execute(){ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); } + delete classify; return 0; } catch(exception& e) { @@ -681,13 +681,13 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa if (m->control_pressed) { return 0; } Sequence* candidateSeq = new Sequence(inFASTA); - + if (candidateSeq->getName() != "") { taxonomy = classify->getTaxonomy(candidateSeq); if (m->control_pressed) { delete candidateSeq; return 0; } - if ((taxonomy != "bad seq") && (taxonomy != "")) { + if (taxonomy != "bad seq") { //output confidence scores or not if (probs) { outTax << candidateSeq->getName() << '\t' << taxonomy << endl; @@ -696,7 +696,7 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa } outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl; - }else{ m->mothurOut("Sequence: " + candidateSeq->getName() + " is bad."); m->mothurOutEndLine(); } + } } delete candidateSeq; @@ -736,19 +736,20 @@ int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; + char* buf4 = new char[length]; MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); + delete buf4; Sequence* candidateSeq = new Sequence(iss); if (candidateSeq->getName() != "") { taxonomy = classify->getTaxonomy(candidateSeq); - if ((taxonomy != "bad seq") && (taxonomy != "")) { + if (taxonomy != "bad seq") { //output confidence scores or not if (probs) { outputString = candidateSeq->getName() + "\t" + taxonomy + "\n"; @@ -757,18 +758,20 @@ int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File } int length = outputString.length(); - char buf2[length]; - strcpy(buf2, outputString.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew); - + delete buf2; + outputString = candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n"; length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp); - }else{ cout << "Sequence: " << candidateSeq->getName() << " is bad." << endl; } + delete buf; + } } delete candidateSeq; @@ -795,19 +798,21 @@ int ClassifySeqsCommand::MPIReadNamesFile(string nameFilename){ MPI_File inMPI; MPI_Offset size; MPI_Status status; - - char inFileName[nameFilename.length()]; - strcpy(inFileName, nameFilename.c_str()); + + char* inFileName = new char[nameFilename.length()]; + memcpy(inFileName, nameFilename.c_str(), nameFilename.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); MPI_File_get_size(inMPI, &size); + delete inFileName; - char buffer[size]; + char* buffer = new char[size]; MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); string tempBuf = buffer; if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } istringstream iss (tempBuf,istringstream::in); + delete buffer; string firstCol, secondCol; while(!iss.eof()) { diff --git a/commandfactory.hpp b/commandfactory.hpp index 6f96e90..3b6a17e 100644 --- a/commandfactory.hpp +++ b/commandfactory.hpp @@ -1,45 +1,45 @@ -#ifndef COMMANDFACTORY_HPP -#define COMMANDFACTORY_HPP - -/* - * commandfactory.h - * - * - * Created by Pat Schloss on 10/25/08. - * Copyright 2008 Patrick D. Schloss. All rights reserved. - * - */ - -#include "mothur.h" -#include "mothurout.h"; - -class Command; - -class CommandFactory { -public: - static CommandFactory* getInstance(); - Command* getCommand(string, string); - Command* getCommand(); - bool isValidCommand(string); - void printCommands(ostream&); - void setOutputDirectory(string o) { outputDir = o; } - void setInputDirectory(string i) { inputDir = i; } - string getOutputDir() { return outputDir; } - bool MPIEnabled(string); - -private: - Command* command; - MothurOut* m; - map commands; - map::iterator it; - string outputDir, inputDir; - - static CommandFactory* _uniqueInstance; - CommandFactory( const CommandFactory& ); // Disable copy constructor - void operator=( const CommandFactory& ); // Disable assignment operator - CommandFactory(); - ~CommandFactory(); - -}; - -#endif +#ifndef COMMANDFACTORY_HPP +#define COMMANDFACTORY_HPP + +/* + * commandfactory.h + * + * + * Created by Pat Schloss on 10/25/08. + * Copyright 2008 Patrick D. Schloss. All rights reserved. + * + */ + +#include "mothur.h" +#include "mothurout.h" + +class Command; + +class CommandFactory { +public: + static CommandFactory* getInstance(); + Command* getCommand(string, string); + Command* getCommand(); + bool isValidCommand(string); + void printCommands(ostream&); + void setOutputDirectory(string o) { outputDir = o; } + void setInputDirectory(string i) { inputDir = i; } + string getOutputDir() { return outputDir; } + bool MPIEnabled(string); + +private: + Command* command; + MothurOut* m; + map commands; + map::iterator it; + string outputDir, inputDir; + + static CommandFactory* _uniqueInstance; + CommandFactory( const CommandFactory& ); // Disable copy constructor + void operator=( const CommandFactory& ); // Disable assignment operator + CommandFactory(); + ~CommandFactory(); + +}; + +#endif diff --git a/distancecommand.cpp b/distancecommand.cpp index 2cc92a7..58302dd 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -199,12 +199,13 @@ int DistanceCommand::execute(){ if (output != "lt") { MPI_File outMPI; int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - - char filename[outputFile.length()]; - strcpy(filename, outputFile.c_str()); + + char* filename = new char[outputFile.length()]; + memcpy(filename, outputFile.c_str(), outputFile.length()); MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI); - + delete filename; + if (pid == 0) { //you are the root process //do your part @@ -247,11 +248,12 @@ int DistanceCommand::execute(){ int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; // MPI_File outMPI; MPI_File inMPI; - - char filename[outputFile.length()]; - strcpy(filename, outputFile.c_str()); + + char* filename = new char[outputFile.length()]; + memcpy(filename, outputFile.c_str(), outputFile.length()); MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); + delete filename; //wait on chidren for(int b = 1; b < processors; b++) { @@ -262,11 +264,13 @@ int DistanceCommand::execute(){ MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); string outTemp = outputFile + toString(b) + ".temp"; - char buf[outTemp.length()]; - strcpy(buf, outTemp.c_str()); + + char* buf = new char[outTemp.length()]; + memcpy(buf, outTemp.c_str(), outTemp.length()); MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); - + delete buf; + int count = 0; while (count < fileSize) { //read 1000 characters at a time //send freqs @@ -483,11 +487,13 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo //send results to parent int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); + + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status); outputString = ""; + delete buf; } @@ -508,11 +514,12 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& si MPI_File outMPI; int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - - char filename[file.length()]; - strcpy(filename, file.c_str()); + + char* filename = new char[file.length()]; + memcpy(filename, file.c_str(), file.length()); MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); + delete filename; int startTime = time(NULL); @@ -550,14 +557,13 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& si //send results to parent int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); MPI_File_write(outMPI, buf, length, MPI_CHAR, &status); size += outputString.length(); outputString = ""; - - + delete buf; } //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index b992750..c94bf65 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -256,15 +256,18 @@ int FilterSeqsCommand::filterSequences() { int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - char outFilename[filteredFasta.length()]; - strcpy(outFilename, filteredFasta.c_str()); - - char inFileName[fastafileNames[s].length()]; - strcpy(inFileName, fastafileNames[s].c_str()); + char* outFilename = new char[filteredFasta.length()]; + memcpy(outFilename, filteredFasta.c_str(), filteredFasta.length()); + + char* inFileName = new char[fastafileNames[s].length()]; + memcpy(inFileName, fastafileNames[s].c_str(), fastafileNames[s].length()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); + delete inFileName; + delete outFilename; + if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; } if (pid == 0) { //you are the root process @@ -380,12 +383,13 @@ int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_Fil //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; + char* buf4 = new char[length]; MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); + delete buf4; Sequence seq(iss); gobble(iss); @@ -405,11 +409,12 @@ int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_Fil if(count % 10 == 0){ //output to file //send results to parent int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status); outputString = ""; + delete buf; } } @@ -420,11 +425,12 @@ int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_Fil if(outputString != ""){ //output to file //send results to parent int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status); outputString = ""; + delete buf; } if((num) % 100 != 0){ cout << (num) << endl; m->mothurOutJustToLog(toString(num) + "\n"); } @@ -692,11 +698,12 @@ string FilterSeqsCommand::createFilter() { MPI_Bcast(&filterString[0], alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD); }else{ //recieve filterString - char tempBuf[alignmentLength]; + char* tempBuf = new char[alignmentLength]; MPI_Bcast(tempBuf, alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD); filterString = tempBuf; if (filterString.length() > alignmentLength) { filterString = filterString.substr(0, alignmentLength); } + delete tempBuf; } MPI_Barrier(MPI_COMM_WORLD); @@ -764,13 +771,14 @@ int FilterSeqsCommand::MPICreateFilter(int start, int num, Filters& F, MPI_File& //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; - char buf4[length]; + char* buf4 = new char[length]; MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); - + delete buf4; + Sequence seq(iss); if (seq.getAligned().length() != alignmentLength) { cout << "Alignment length is " << alignmentLength << " and sequence " << seq.getName() << " has length " << seq.getAligned().length() << ", please correct." << endl; exit(1); } diff --git a/pintail.cpp b/pintail.cpp index 84c3219..1122c24 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -310,11 +310,12 @@ int Pintail::print(MPI_File& out, MPI_File& outAcc) { MPI_Status statusAcc; int length = outAccString.length(); - char buf[length]; - strcpy(buf, outAccString.c_str()); + char* buf = new char[length]; + memcpy(buf, outAccString.c_str(), length); MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); - + delete buf; + results = true; } outputString += "Observed\t"; @@ -329,10 +330,11 @@ int Pintail::print(MPI_File& out, MPI_File& outAcc) { MPI_Status status; int length = outputString.length(); - char buf2[length]; - strcpy(buf2, outputString.c_str()); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); + delete buf2; return results; } @@ -424,17 +426,19 @@ vector Pintail::readFreq() { MPI_File inMPI; MPI_Offset size; MPI_Status status; - - char inFileName[consfile.length()]; - strcpy(inFileName, consfile.c_str()); + + char* inFileName = new char[consfile.length()]; + memcpy(inFileName, consfile.c_str(), consfile.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); MPI_File_get_size(inMPI, &size); + delete inFileName; - char buffer[size]; + char* buffer = new char[size]; MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); string tempBuf = buffer; + delete buffer; if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } istringstream iss (tempBuf,istringstream::in); @@ -621,18 +625,21 @@ vector< vector > Pintail::readQuantiles() { MPI_Offset size; MPI_Status status; - char inFileName[quanfile.length()]; - strcpy(inFileName, quanfile.c_str()); + char* inFileName = new char[quanfile.length()]; + memcpy(inFileName, quanfile.c_str(), quanfile.length()); MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); MPI_File_get_size(inMPI, &size); + delete inFileName; + - char buffer[size]; + char* buffer = new char[size]; MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); string tempBuf = buffer; if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } istringstream iss (tempBuf,istringstream::in); + delete buffer; while(!iss.eof()) { iss >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; @@ -700,21 +707,24 @@ void Pintail::printQuanFile(string file, string outputString) { MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - - char FileName[file.length()]; - strcpy(FileName, file.c_str()); + + char* FileName = new char[file.length()]; + memcpy(FileName, file.c_str(), file.length()); if (pid == 0) { MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outQuan); //comm, filename, mode, info, filepointer int length = outputString.length(); - char buf[length]; - strcpy(buf, outputString.c_str()); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); MPI_File_write(outQuan, buf, length, MPI_CHAR, &status); - + delete buf; + MPI_File_close(&outQuan); } + + delete FileName; #else ofstream outQuan; openOutputFile(file, outQuan); diff --git a/sequence.cpp b/sequence.cpp index 19adf79..136e893 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -447,23 +447,7 @@ void Sequence::reverseComplement(){ //******************************************************************************************************************** int Sequence::MPISend(int receiver) { try { - //send name - string - int length = name.length(); - char buf[name.length()]; - strcpy(buf, name.c_str()); - MPI_Send(&length, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); - - MPI_Send(&buf, length, MPI_CHAR, receiver, 2001, MPI_COMM_WORLD); - - //send aligned - string - length = aligned.length(); - char buf2[aligned.length()]; - strcpy(buf2, aligned.c_str()); - - MPI_Send(&length, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); - - MPI_Send(&buf2, length, MPI_CHAR, receiver, 2001, MPI_COMM_WORLD); return 0; @@ -476,24 +460,6 @@ int Sequence::MPISend(int receiver) { /**************************************************************************************************/ int Sequence::MPIRecv(int sender) { try { - MPI_Status status; - - //receive name - string - int length; - MPI_Recv(&length, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status); - - char buf[length]; - MPI_Recv(&buf, length, MPI_CHAR, sender, 2001, MPI_COMM_WORLD, &status); - name = buf; - - //receive aligned - string - MPI_Recv(&length, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status); - - char buf2[length]; - MPI_Recv(&buf2, length, MPI_CHAR, sender, 2001, MPI_COMM_WORLD, &status); - aligned = buf2; - - setAligned(aligned); return 0; -- 2.39.2