]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayercommand.cpp
changed how we count sequences in a fastafile to allow for '>' in sequence names
[mothur.git] / chimeraslayercommand.cpp
index 336dba939f1fd114c6ace70f1ddc2501448df336..004cfb0c9e1124cd5ad78c542c6df3815bd4541d 100644 (file)
@@ -8,10 +8,6 @@
  */
 
 #include "chimeraslayercommand.h"
-#include "bellerophon.h"
-#include "pintail.h"
-#include "ccode.h"
-#include "chimeracheckrdp.h"
 #include "chimeraslayer.h"
 
 
@@ -141,14 +137,13 @@ void ChimeraSlayerCommand::help(){
        
                m->mothurOut("The chimera.slayer command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
                m->mothurOut("This command was modeled after the chimeraSlayer written by the Broad Institute.\n");
-               m->mothurOut("The chimera.slayer command parameters are fasta, template, filter, mask, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
+               m->mothurOut("The chimera.slayer command parameters are fasta, template, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
                m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
                m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
                m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
                #ifdef USE_MPI
                m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
                #endif
-               m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
                m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=50. \n");
                m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n");
                m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n");
@@ -164,7 +159,6 @@ void ChimeraSlayerCommand::help(){
                m->mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
                m->mothurOut("The search parameter allows you to specify search method for finding the closest parent. Choices are distance, blast, and kmer, default distance. \n");
                //m->mothurOut("The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default false. Found to make results worse. \n");
-               m->mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                m->mothurOut("The chimera.slayer command should be in the following format: \n");
                m->mothurOut("chimera.slayer(fasta=yourFastaFile, template=yourTemplate, search=yourSearch) \n");
                m->mothurOut("Example: chimera.slayer(fasta=AD.align, template=core_set_aligned.imputed.fasta, search=kmer) \n");
@@ -220,22 +214,34 @@ int ChimeraSlayerCommand::execute(){
                        
                        int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
                        int inMode=MPI_MODE_RDONLY; 
-                                                       
-                       char outFilename[outputFileName.length()];
+                       
+                       //char* outFilename = new char[outputFileName.length()];
+                       //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
+                       
+                       char outFilename[1024];
                        strcpy(outFilename, outputFileName.c_str());
                        
-                       char outAccnosFilename[accnosFileName.length()];
+                       //char* outAccnosFilename = new char[accnosFileName.length()];
+                       //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
+                       
+                       char outAccnosFilename[1024];
                        strcpy(outAccnosFilename, accnosFileName.c_str());
+
+                       //char* inFileName = new char[fastafile.length()];
+                       //memcpy(inFileName, fastafile.c_str(), fastafile.length());
                        
-                       char inFileName[fastafile.length()];
+                       char inFileName[1024];
                        strcpy(inFileName, fastafile.c_str());
 
                        MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
                        MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
                        MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                        
-                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+                       //delete inFileName;
+                       //delete outFilename;
+                       //delete outAccnosFilename;
 
+                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
                
                        if (pid == 0) { //you are the root process 
                                m->mothurOutEndLine();
@@ -246,20 +252,25 @@ int ChimeraSlayerCommand::execute(){
                                
                                //print header
                                int length = outTemp.length();
-                               char buf2[length];
-                               strcpy(buf2, outTemp.c_str()); 
+                               char* buf2 = new char[length];
+                               memcpy(buf2, outTemp.c_str(), length);
+
                                MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
-                               
+                               delete buf2;
+
                                MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
                                
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                        
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
@@ -272,14 +283,14 @@ int ChimeraSlayerCommand::execute(){
                                        if (tempResult != 0) { MPIWroteAccnos = true; }
                                }
                        }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
@@ -293,6 +304,7 @@ int ChimeraSlayerCommand::execute(){
                        MPI_File_close(&inMPI);
                        MPI_File_close(&outMPI);
                        MPI_File_close(&outMPIAccnos);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                        
                        //delete accnos file if blank
                        if (pid == 0) {
@@ -317,7 +329,7 @@ int ChimeraSlayerCommand::execute(){
                        if(processors == 1){
                                ifstream inFASTA;
                                openInputFile(fastafile, inFASTA);
-                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               getNumSeqs(inFASTA, numSeqs);
                                inFASTA.close();
                                
                                lines.push_back(new linePair(0, numSeqs));
@@ -364,7 +376,6 @@ int ChimeraSlayerCommand::execute(){
                                        lines.push_back(new linePair(startPos, numSeqsPerProcessor));
                                }
                                
-                               
                                createProcesses(outputFileName, fastafile, accnosFileName); 
                        
                                rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
@@ -405,8 +416,8 @@ int ChimeraSlayerCommand::execute(){
 
                #else
                        ifstream inFASTA;
-                       openInputFile(candidateFileNames[s], inFASTA);
-                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       openInputFile(fastafile, inFASTA);
+                       getNumSeqs(inFASTA, numSeqs);
                        inFASTA.close();
                        lines.push_back(new linePair(0, numSeqs));
                        
@@ -425,7 +436,7 @@ int ChimeraSlayerCommand::execute(){
                        if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
                #endif
                
-               appendFiles(tempHeader, outputFileName);
+               appendFiles(outputFileName, tempHeader);
        
                remove(outputFileName.c_str());
                rename(tempHeader.c_str(), outputFileName.c_str());
@@ -508,8 +519,7 @@ int ChimeraSlayerCommand::driver(linePair* line, string outputFName, string file
 //**********************************************************************************************************************
 #ifdef USE_MPI
 int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<long>& MPIPos){
-       try {
-                               
+       try {                           
                MPI_Status status; 
                int pid;
                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
@@ -520,29 +530,33 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil
                        
                        //read next sequence
                        int length = MPIPos[start+i+1] - MPIPos[start+i];
-       
-                       char buf4[length];
+
+                       char* buf4 = new char[length];
                        MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
-                       
+       
                        string tempBuf = buf4;
                        if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
                        istringstream iss (tempBuf,istringstream::in);
 
+                       delete buf4;
+
                        Sequence* candidateSeq = new Sequence(iss);  gobble(iss);
-                               
+               
                        if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
                                
                                if (candidateSeq->getAligned().length() != templateSeqsLength) {  
                                        m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
                                }else{
+               
                                        //find chimeras
                                        chimera->getChimeras(candidateSeq);
-                                       
+                       
                                        if (m->control_pressed) {       delete candidateSeq; return 1;  }
-               
+               //cout << "about to print" << endl;
                                        //print results
                                        bool isChimeric = chimera->print(outMPI, outAccMPI);
                                        if (isChimeric) { MPIWroteAccnos = true;  }
+       
                                }
                        }
                        delete candidateSeq;