]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayercommand.cpp
changed how we count sequences in a fastafile to allow for '>' in sequence names
[mothur.git] / chimeraslayercommand.cpp
index 4b309c0124a2038cb49db3980345e10e535ce84d..004cfb0c9e1124cd5ad78c542c6df3815bd4541d 100644 (file)
@@ -137,14 +137,13 @@ void ChimeraSlayerCommand::help(){
        
                m->mothurOut("The chimera.slayer command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
                m->mothurOut("This command was modeled after the chimeraSlayer written by the Broad Institute.\n");
-               m->mothurOut("The chimera.slayer command parameters are fasta, template, filter, mask, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
+               m->mothurOut("The chimera.slayer command parameters are fasta, template, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
                m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
                m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
                m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
                #ifdef USE_MPI
                m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
                #endif
-               m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
                m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=50. \n");
                m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n");
                m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n");
@@ -160,7 +159,6 @@ void ChimeraSlayerCommand::help(){
                m->mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
                m->mothurOut("The search parameter allows you to specify search method for finding the closest parent. Choices are distance, blast, and kmer, default distance. \n");
                //m->mothurOut("The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default false. Found to make results worse. \n");
-               m->mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                m->mothurOut("The chimera.slayer command should be in the following format: \n");
                m->mothurOut("chimera.slayer(fasta=yourFastaFile, template=yourTemplate, search=yourSearch) \n");
                m->mothurOut("Example: chimera.slayer(fasta=AD.align, template=core_set_aligned.imputed.fasta, search=kmer) \n");
@@ -217,22 +215,31 @@ int ChimeraSlayerCommand::execute(){
                        int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
                        int inMode=MPI_MODE_RDONLY; 
                        
-                       char* outFilename = new char[outputFileName.length()];
-                       memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
+                       //char* outFilename = new char[outputFileName.length()];
+                       //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
                        
-                       char* outAccnosFilename = new char[accnosFileName.length()];
-                       memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
+                       char outFilename[1024];
+                       strcpy(outFilename, outputFileName.c_str());
+                       
+                       //char* outAccnosFilename = new char[accnosFileName.length()];
+                       //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
+                       
+                       char outAccnosFilename[1024];
+                       strcpy(outAccnosFilename, accnosFileName.c_str());
 
-                       char* inFileName = new char[fastafile.length()];
-                       memcpy(inFileName, fastafile.c_str(), fastafile.length());
+                       //char* inFileName = new char[fastafile.length()];
+                       //memcpy(inFileName, fastafile.c_str(), fastafile.length());
+                       
+                       char inFileName[1024];
+                       strcpy(inFileName, fastafile.c_str());
 
                        MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
                        MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
                        MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                        
-                       delete inFileName;
-                       delete outFilename;
-                       delete outAccnosFilename;
+                       //delete inFileName;
+                       //delete outFilename;
+                       //delete outAccnosFilename;
 
                        if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
                
@@ -254,13 +261,16 @@ int ChimeraSlayerCommand::execute(){
                                MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
                                
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                        
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
@@ -273,14 +283,14 @@ int ChimeraSlayerCommand::execute(){
                                        if (tempResult != 0) { MPIWroteAccnos = true; }
                                }
                        }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
@@ -294,6 +304,7 @@ int ChimeraSlayerCommand::execute(){
                        MPI_File_close(&inMPI);
                        MPI_File_close(&outMPI);
                        MPI_File_close(&outMPIAccnos);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                        
                        //delete accnos file if blank
                        if (pid == 0) {
@@ -318,7 +329,7 @@ int ChimeraSlayerCommand::execute(){
                        if(processors == 1){
                                ifstream inFASTA;
                                openInputFile(fastafile, inFASTA);
-                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               getNumSeqs(inFASTA, numSeqs);
                                inFASTA.close();
                                
                                lines.push_back(new linePair(0, numSeqs));
@@ -365,7 +376,6 @@ int ChimeraSlayerCommand::execute(){
                                        lines.push_back(new linePair(startPos, numSeqsPerProcessor));
                                }
                                
-                               
                                createProcesses(outputFileName, fastafile, accnosFileName); 
                        
                                rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
@@ -407,7 +417,7 @@ int ChimeraSlayerCommand::execute(){
                #else
                        ifstream inFASTA;
                        openInputFile(fastafile, inFASTA);
-                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       getNumSeqs(inFASTA, numSeqs);
                        inFASTA.close();
                        lines.push_back(new linePair(0, numSeqs));