]> git.donarmstrong.com Git - mothur.git/commitdiff
added MPI code, broke up chimera.seqs into 5 separated commands, added parse.sff...
authorwestcott <westcott>
Wed, 7 Apr 2010 12:55:09 +0000 (12:55 +0000)
committerwestcott <westcott>
Wed, 7 Apr 2010 12:55:09 +0000 (12:55 +0000)
62 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
aligncommand.h
alignmentdb.cpp
alignmentdb.h
bellerophon.cpp
bellerophon.h
blastdb.cpp
blastdb.hpp
ccode.cpp
ccode.h
chimera.cpp
chimera.h
chimerabellerophoncommand.cpp [new file with mode: 0644]
chimerabellerophoncommand.h [new file with mode: 0644]
chimeraccodecommand.cpp [new file with mode: 0644]
chimeraccodecommand.h [new file with mode: 0644]
chimeracheckcommand.cpp [new file with mode: 0644]
chimeracheckcommand.h [new file with mode: 0644]
chimeracheckrdp.cpp
chimeracheckrdp.h
chimerapintailcommand.cpp [new file with mode: 0644]
chimerapintailcommand.h [new file with mode: 0644]
chimeraseqscommand.cpp
chimeraseqscommand.h
chimeraslayer.cpp
chimeraslayer.h
chimeraslayercommand.cpp [new file with mode: 0644]
chimeraslayercommand.h [new file with mode: 0644]
classify.cpp
classifyseqscommand.cpp
classifyseqscommand.h
cluster.cpp
clustercommand.cpp
commandfactory.cpp
database.hpp
distancecommand.cpp
distancecommand.h
distancedb.hpp
filterseqscommand.cpp
filterseqscommand.h
fullmatrix.cpp
globaldata.hpp
kmerdb.cpp
kmerdb.hpp
mothur.h
nastreport.cpp
nastreport.hpp
parsesffcommand.cpp [new file with mode: 0644]
parsesffcommand.h [new file with mode: 0644]
pintail.cpp
pintail.h
readcolumn.cpp
readdistcommand.cpp
readdistcommand.h
readmatrix.hpp
readphylip.cpp
sequence.cpp
sequence.hpp
suffixdb.cpp
suffixdb.hpp
validparameter.cpp

index f07824e75ecdba89c37fddc31316861b23d958d8..24351a06ea744778b9b463d2ebeec5572b9b5dd2 100644 (file)
@@ -7,6 +7,16 @@
        objects = {
 
 /* Begin PBXFileReference section */
+               A747E79B1163442A00FB9042 /* chimeracheckcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckcommand.h; sourceTree = "<group>"; };
+               A747E79C1163442A00FB9042 /* chimeracheckcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckcommand.cpp; sourceTree = "<group>"; };
+               A747E81C116365E000FB9042 /* chimeraslayercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayercommand.h; sourceTree = "<group>"; };
+               A747E81D116365E000FB9042 /* chimeraslayercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayercommand.cpp; sourceTree = "<group>"; };
+               A78254461164D7790002E2DD /* chimerapintailcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerapintailcommand.h; sourceTree = "<group>"; };
+               A78254471164D7790002E2DD /* chimerapintailcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerapintailcommand.cpp; sourceTree = "<group>"; };
+               A7825502116519F70002E2DD /* chimerabellerophoncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerabellerophoncommand.h; sourceTree = "<group>"; };
+               A7825503116519F70002E2DD /* chimerabellerophoncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerabellerophoncommand.cpp; sourceTree = "<group>"; };
+               A78434881162224F00100BE0 /* chimeraccodecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraccodecommand.h; sourceTree = "<group>"; };
+               A78434891162224F00100BE0 /* chimeraccodecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraccodecommand.cpp; sourceTree = "<group>"; };
                A7DA1FEC113FECD400BF472F /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                A7DA1FED113FECD400BF472F /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
                A7DA1FEE113FECD400BF472F /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = "<group>"; };
                A7DA217A113FECD400BF472F /* weighted.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = weighted.h; sourceTree = "<group>"; };
                A7DA217B113FECD400BF472F /* whittaker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whittaker.cpp; sourceTree = "<group>"; };
                A7DA217C113FECD400BF472F /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = "<group>"; };
+               A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsesffcommand.cpp; sourceTree = "<group>"; };
+               A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsesffcommand.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXGroup section */
                                A7DA2008113FECD400BF472F /* bootstrapsharedcommand.h */,
                                A7DA2017113FECD400BF472F /* chimeraseqscommand.cpp */,
                                A7DA2018113FECD400BF472F /* chimeraseqscommand.h */,
+                               A7825502116519F70002E2DD /* chimerabellerophoncommand.h */,
+                               A7825503116519F70002E2DD /* chimerabellerophoncommand.cpp */,
+                               A747E79B1163442A00FB9042 /* chimeracheckcommand.h */,
+                               A747E79C1163442A00FB9042 /* chimeracheckcommand.cpp */,
+                               A78434881162224F00100BE0 /* chimeraccodecommand.h */,
+                               A78434891162224F00100BE0 /* chimeraccodecommand.cpp */,
+                               A78254461164D7790002E2DD /* chimerapintailcommand.h */,
+                               A78254471164D7790002E2DD /* chimerapintailcommand.cpp */,
+                               A747E81C116365E000FB9042 /* chimeraslayercommand.h */,
+                               A747E81D116365E000FB9042 /* chimeraslayercommand.cpp */,
                                A7DA201D113FECD400BF472F /* classifyseqscommand.cpp */,
                                A7DA201E113FECD400BF472F /* classifyseqscommand.h */,
                                A7DA2021113FECD400BF472F /* clustercommand.cpp */,
                                A7DA20B9113FECD400BF472F /* otuhierarchycommand.h */,
                                A7DA20BC113FECD400BF472F /* parselistscommand.cpp */,
                                A7DA20BD113FECD400BF472F /* parselistscommand.h */,
+                               A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */,
+                               A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */,
                                A7DA20C0113FECD400BF472F /* parsimonycommand.cpp */,
                                A7DA20C1113FECD400BF472F /* parsimonycommand.h */,
                                A7DA20C2113FECD400BF472F /* pcacommand.cpp */,
index 257587fa048ac3b494fa4f0ed4481f2cd505e513..a4b3a79012aa5ac0d53c6ff78f541864984b42ec 100644 (file)
@@ -32,7 +32,7 @@ AlignCommand::AlignCommand(string option)  {
        try {
                
                abort = false;
-               
+       
                //allow user to run help
                if(option == "help") { help(); abort = true; }
                
@@ -95,23 +95,45 @@ AlignCommand::AlignCommand(string option)  {
                                                //if the user has not given a path then, add inputdir. else leave path alone.
                                                if (path == "") {       candidateFileNames[i] = inputDir + candidateFileNames[i];               }
                                        }
-
+       
                                        int ableToOpen;
                                        ifstream in;
+                                       
+                                       #ifdef USE_MPI  
+                                               int pid;
+                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                                               if (pid == 0) {
+                                       #endif
+
                                        ableToOpen = openInputFile(candidateFileNames[i], in);
+                                       in.close();
+                                       
+                                       #ifdef USE_MPI  
+                                                       for (int j = 1; j < processors; j++) {
+                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
+                                                       }
+                                               }else{
+                                                       MPI_Status status;
+                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                               }
+                                               
+                                       #endif
+
                                        if (ableToOpen == 1) { 
                                                m->mothurOut(candidateFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
                                                //erase from file list
                                                candidateFileNames.erase(candidateFileNames.begin()+i);
                                                i--;
                                        }
-                                       in.close();
+                                       
                                }
                                
                                //make sure there is at least one valid file left
                                if (candidateFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
                        }
-                       
+               
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        string temp;
@@ -197,10 +219,10 @@ void AlignCommand::help(){
 int AlignCommand::execute(){
        try {
                if (abort == true) {    return 0;       }
-               
+
                templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch);
                int longestBase = templateDB->getLongestBase();
-       
+               
                if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
                else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
                else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
@@ -226,8 +248,111 @@ int AlignCommand::execute(){
                        int numFastaSeqs = 0;
                        for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                        int start = time(NULL);
+               
+#ifdef USE_MPI 
+                               int pid, end, numSeqsPerProcessor; 
+                               int tag = 2001;
+                               vector<long> MPIPos;
+                               MPIWroteAccnos = false;
+                               
+                               MPI_Status status; 
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                               MPI_File inMPI;
+                               MPI_File outMPIAlign;
+                               MPI_File outMPIReport;
+                               MPI_File outMPIAccnos;
+                               
+                               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                               int inMode=MPI_MODE_RDONLY; 
+                                                               
+                               char outAlignFilename[alignFileName.length()];
+                               strcpy(outAlignFilename, alignFileName.c_str());
+                               
+                               char outReportFilename[reportFileName.length()];
+                               strcpy(outReportFilename, reportFileName.c_str());
+                               
+                               char outAccnosFilename[accnosFileName.length()];
+                               strcpy(outAccnosFilename, accnosFileName.c_str());
+                               
+                               char inFileName[candidateFileNames[s].length()];
+                               strcpy(inFileName, candidateFileNames[s].c_str());
+
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               MPI_File_open(MPI_COMM_WORLD, outAlignFilename, outMode, MPI_INFO_NULL, &outMPIAlign);
+                               MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport);
+                               MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+                               
+                               if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); return 0; }
+                               
+                               if (pid == 0) { //you are the root process 
+                                       
+                                       MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
+                                       
+                                       //send file positions to all processes
+                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos   
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numFastaSeqs / processors;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                               
+                                       //align your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
+                                       
+                                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); return 0; }
+
+                                       for (int i = 1; i < processors; i++) {
+                                               bool tempResult;
+                                               MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                               if (tempResult != 0) { MPIWroteAccnos = true; }
+                                       }
+                               }else{ //you are a child process
+                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       MPIPos.resize(numFastaSeqs+1);
+                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numFastaSeqs / processors;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       
+                                       //align your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
+                                       
+                                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); return 0; }
+
+                                       MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                               }
+                               
+                               //close files 
+                               MPI_File_close(&inMPI);
+                               MPI_File_close(&outMPIAlign);
+                               MPI_File_close(&outMPIReport);
+                               MPI_File_close(&outMPIAccnos);
+                               
+                               //delete accnos file if blank
+                               if (pid == 0) {
+                                       //delete accnos file if its blank else report to user
+                                       if (MPIWroteAccnos) { 
+                                               m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                                               if (!flip) {
+                                                       m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                                               }else{  m->mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                                               m->mothurOutEndLine();
+                                       }else { 
+                                               //MPI_Info info;
+                                               //MPI_File_delete(outAccnosFilename, info);
+                                               hasAccnos = false;      
+                                               remove(accnosFileName.c_str()); 
+                                       }
+                               }
+                               
+#else
                        
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
                                ifstream inFASTA;
                                openInputFile(candidateFileNames[s], inFASTA);
@@ -327,7 +452,7 @@ int AlignCommand::execute(){
                                        return 0; 
                                }
                        }
-#else
+       #else
                        ifstream inFASTA;
                        openInputFile(candidateFileNames[s], inFASTA);
                        numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
@@ -354,12 +479,25 @@ int AlignCommand::execute(){
                                m->mothurOutEndLine();
                        }
                        
-#endif
-                       
+       #endif
+
+#endif         
+
+
+               #ifdef USE_MPI
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                       if (pid == 0) { //only one process should output to screen
+               #endif
+
                        outputNames.push_back(alignFileName);
                        outputNames.push_back(reportFileName);
                        if (hasAccnos)  {       outputNames.push_back(accnosFileName);          }
-                                               
+                       
+               #ifdef USE_MPI
+                       }
+               #endif
+
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
                        m->mothurOutEndLine();
                        m->mothurOutEndLine();
@@ -395,12 +533,13 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName,
                openInputFile(filename, inFASTA);
 
                inFASTA.seekg(line->start);
-               
+       
                for(int i=0;i<line->numSeqs;i++){
                        
                        if (m->control_pressed) {  return 0; }
                        
                        Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
+       
                        int origNumBases = candidateSeq->getNumBases();
                        string originalUnaligned = candidateSeq->getUnaligned();
                        int numBasesNeeded = origNumBases * threshold;
@@ -491,7 +630,153 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName,
                exit(1);
        }
 }
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector<long>& MPIPos){
+       try {
+               string outputString = "";
+               MPI_Status statusReport; 
+               MPI_Status statusAlign; 
+               MPI_Status statusAccnos; 
+               MPI_Status status; 
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+       
+               NastReport report;
+               
+               if (pid == 0) {
+                       outputString = report.getHeaders();
+                       int length = outputString.length();
+                       char buf[length];
+                       strcpy(buf, outputString.c_str()); 
+               
+                       MPI_File_write_shared(reportFile, buf, length, MPI_CHAR, &statusReport);
+               }
+               
+               for(int i=0;i<num;i++){
+               
+                       if (m->control_pressed) {  return 0; }
 
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+                       
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+       
+                       Sequence* candidateSeq = new Sequence(iss);  
+                       int origNumBases = candidateSeq->getNumBases();
+                       string originalUnaligned = candidateSeq->getUnaligned();
+                       int numBasesNeeded = origNumBases * threshold;
+       
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(candidateSeq->getUnaligned().length()+1);
+                               }
+                                                               
+                               Sequence temp = templateDB->findClosestSequence(candidateSeq);
+                               Sequence* templateSeq = &temp;
+                               
+                               float searchScore = templateDB->getSearchScore();
+                                                               
+                               Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+                               Sequence* copy;
+                               
+                               Nast* nast2;
+                               bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
+                                                                                               //since nast does not make a copy of hte sequence passed, and it is used by the reporter below
+                                                                                               //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
+                                                                                               //so this bool tells you if you need to delete it
+                                                                                               
+                               //if there is a possibility that this sequence should be reversed
+                               if (candidateSeq->getNumBases() < numBasesNeeded) {
+                                       
+                                       string wasBetter = "";
+                                       //if the user wants you to try the reverse
+                                       if (flip) {
+                                               //get reverse compliment
+                                               copy = new Sequence(candidateSeq->getName(), originalUnaligned);
+                                               copy->reverseComplement();
+                                               
+                                               //rerun alignment
+                                               Sequence temp2 = templateDB->findClosestSequence(copy);
+                                               Sequence* templateSeq2 = &temp2;
+                                               
+                                               searchScore = templateDB->getSearchScore();
+                                               
+                                               nast2 = new Nast(alignment, copy, templateSeq2);
+                       
+                                               //check if any better
+                                               if (copy->getNumBases() > candidateSeq->getNumBases()) {
+                                                       candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
+                                                       templateSeq = templateSeq2; 
+                                                       delete nast;
+                                                       nast = nast2;
+                                                       needToDeleteCopy = true;
+                                               }else{  
+                                                       wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
+                                                       delete nast2;
+                                                       delete copy;    
+                                               }
+                                       }
+                                       
+                                       //create accnos file with names
+                                       outputString = candidateSeq->getName() + wasBetter + "\n";
+                                       
+                                       //send results to parent
+                                       int length = outputString.length();
+                                       char buf[length];
+                                       strcpy(buf, outputString.c_str()); 
+                               
+                                       MPI_File_write_shared(accnosFile, buf, length, MPI_CHAR, &statusAccnos);
+                                       MPIWroteAccnos = true;
+                               }
+                               
+                               report.setCandidate(candidateSeq);
+                               report.setTemplate(templateSeq);
+                               report.setSearchParameters(search, searchScore);
+                               report.setAlignmentParameters(align, alignment);
+                               report.setNastParameters(*nast);
+       
+                               outputString =  ">" + candidateSeq->getName() + "\n" + candidateSeq->getAligned() + "\n";
+                               
+                               //send results to parent
+                               int length = outputString.length();
+                               char buf2[length];
+                               strcpy(buf2, outputString.c_str()); 
+                               
+                               MPI_File_write_shared(alignFile, buf2, length, MPI_CHAR, &statusAlign);
+                               
+                               outputString = report.getReport();
+                               
+                               //send results to parent
+                               length = outputString.length();
+                               char buf3[length];
+                               strcpy(buf3, outputString.c_str()); 
+                               
+                               MPI_File_write_shared(reportFile, buf3, length, MPI_CHAR, &statusReport);
+
+                               delete nast;
+                               if (needToDeleteCopy) {   delete copy;   }
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   cout << (toString(i+1)) << endl;                }
+               }
+               //report progress
+               if((num) % 100 != 0){   cout << (toString(num)) << endl;                }
+               
+               return 1;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignCommand", "driverMPI");
+               exit(1);
+       }
+}
+#endif
 /**************************************************************************************************/
 
 int AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename) {
@@ -577,5 +862,4 @@ void AlignCommand::appendReportFiles(string temp, string filename) {
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
index f0496a54a9070b31dcb699a0deefc7cb51fc8a50..b100287269f16781839c884bd83121c620e279ba 100644 (file)
@@ -32,6 +32,7 @@ private:
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
+       bool MPIWroteAccnos;
        
        AlignmentDB* templateDB;
        Alignment* alignment;
@@ -41,6 +42,10 @@ private:
        void appendAlignFiles(string, string); 
        void appendReportFiles(string, string);
        
+       #ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<long>&);
+       #endif
+       
        string candidateFileName, templateFileName, distanceFileName, search, align, outputDir;
        float match, misMatch, gapOpen, gapExtend, threshold;
        int processors, kmerSize;
index 51fb17517f3598fa975c5349fcf9fe4af1dbec90..4b324b49c0f081356ab84a119e9657b05199dcb4 100644 (file)
 
 
 /**************************************************************************************************/
-AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){              //      This assumes that the template database is in fasta format, may 
+AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){           //      This assumes that the template database is in fasta format, may 
        try {                                                                                   //      need to alter this in the future?
                m = MothurOut::getInstance();
                longest = 0;
-
-               ifstream fastaFile;
-               openInputFile(fastaFileName, fastaFile);
+               method = s;
+               bool needToGenerate = true;
                
                m->mothurOutEndLine();
                m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
                
+               #ifdef USE_MPI  
+                       int pid;
+                       vector<long> positions;
+               
+                       MPI_Status status; 
+                       MPI_File inMPI;
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+       
+                       char inFileName[fastaFileName.length()];
+                       strcpy(inFileName, fastaFileName.c_str());
+       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       
+                       if (pid == 0) {
+                               positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
+                       }else{
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               positions.resize(numSeqs+1);
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                       }
+                       
+                       //read file 
+                       for(int i=0;i<numSeqs;i++){
+                               
+                               if (m->control_pressed) {  templateSequences.clear(); break;  }
+                               
+                               //read next sequence
+                               int length = positions[i+1] - positions[i];
+                               char buf4[length];
+                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+                               
+                               string tempBuf = buf4;
+                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                               
+                               istringstream iss (tempBuf,istringstream::in);
+               
+                               Sequence temp(iss);  
+                               if (temp.getName() != "") {
+                                       templateSequences.push_back(temp);
+                                       //save longest base
+                                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
+                               }
+                       }
+                       
+                       MPI_File_close(&inMPI);
+       #else
+               ifstream fastaFile;
+               openInputFile(fastaFileName, fastaFile);
+
                while (!fastaFile.eof()) {
                        Sequence temp(fastaFile);  gobble(fastaFile);
                        
@@ -36,10 +88,11 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa
                                if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
                        }
                }
+               fastaFile.close();
                
+       #endif
+       
                numSeqs = templateSequences.size();
-               
-               fastaFile.close();
                //all of this is elsewhere already!
                
                m->mothurOut("DONE.");
@@ -51,15 +104,18 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa
                emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
                emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
                
-               bool needToGenerate = true;
+               
                string kmerDBName;
                if(method == "kmer")                    {       
                        search = new KmerDB(fastaFileName, kmerSize);                   
                        
-                       kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
-                       ifstream kmerFileTest(kmerDBName.c_str());
+                       #ifdef USE_MPI
+                       #else
+                               kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+                               ifstream kmerFileTest(kmerDBName.c_str());
                        
-                       if(kmerFileTest){       needToGenerate = false;         }
+                               if(kmerFileTest){       needToGenerate = false;         }
+                       #endif
                }
                else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
                else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }
@@ -74,7 +130,12 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa
                                //add sequences to search 
                                for (int i = 0; i < templateSequences.size(); i++) {
                                        search->addSequence(templateSequences[i]);
+                                       
+                                       if (m->control_pressed) {  templateSequences.clear(); break;  }
                                }
+                               
+                               if (m->control_pressed) {  templateSequences.clear();  }
+                               
                                search->generateDB();
                                
                        }else if ((method == "kmer") && (!needToGenerate)) {
@@ -91,6 +152,29 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa
        }
 }
 /**************************************************************************************************/
+AlignmentDB::AlignmentDB(string s){             
+       try {                                                                                   
+               m = MothurOut::getInstance();
+               method = s;
+               
+               if(method == "suffix")          {       search = new SuffixDB();        }
+               else if(method == "blast")      {       search = new BlastDB();         }
+               else                                            {       search = new KmerDB();          }
+
+                               
+               //in case you delete the seqs and then ask for them
+               emptySequence = Sequence();
+               emptySequence.setName("no_match");
+               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "AlignmentDB");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 AlignmentDB::~AlignmentDB() {  delete search;  }
 /**************************************************************************************************/
 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
@@ -107,7 +191,68 @@ Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
                exit(1);
        }
 }
+#ifdef USE_MPI 
 /**************************************************************************************************/
+int AlignmentDB::MPISend(int receiver) {
+       try {
+               
+               //send numSeqs - int
+               MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
+                                                                       
+               //send longest - int
+               MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
+       
+               //send templateSequences
+               for (int i = 0; i < templateSequences.size(); i++) {
+                       templateSequences[i].MPISend(receiver);
+               }
+               
+               //send Database
+               search->MPISend(receiver);
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "MPISend");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int AlignmentDB::MPIRecv(int sender) {
+       try {
+               MPI_Status status;
+               //receive numSeqs - int
+               MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
+               
+               //receive longest - int
+               MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
+
+               //receive templateSequences
+               templateSequences.resize(numSeqs);
+               for (int i = 0; i < templateSequences.size(); i++) {
+                       templateSequences[i].MPIRecv(sender);
+               }
+
+               //receive Database
+               search->MPIRecv(sender);
+       
+               for (int i = 0; i < templateSequences.size(); i++) {
+                       search->addSequence(templateSequences[i]);
+               }
+               search->generateDB();
+               search->setNumSeqs(numSeqs);
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "MPIRecv");
+               exit(1);
+       }
+}
+#endif
+/**************************************************************************************************/
+
+
 
 
 
index d665f788764f6ea228ad70bb99867602820a16c5..a69b917388fcabb052ab453a4c9e8e93e55e3fb6 100644 (file)
@@ -21,15 +21,18 @@ class AlignmentDB {
 public:
 
        AlignmentDB(string, string, int, float, float, float, float);  //reads fastafile passed in and stores sequences
+       AlignmentDB(string);
        ~AlignmentDB();
        
        Sequence findClosestSequence(Sequence*);
        float getSearchScore()  {  return search->getSearchScore();  }
        int getLongestBase()    {  return longest;  }
+       int MPISend(int);
+       int MPIRecv(int);
        
 private:
        int numSeqs, longest;
-       float searchScore;
+       string method;
        
        Database* search;
        vector<Sequence> templateSequences;
index 54dfb9b67ecf5e764e3c159dffcc47d6fd7004dc..25c5de757f9de3061eb39326f4a0f5412f711327 100644 (file)
 #include "onegapdist.h"
 
 
-//***************************************************************************************************************
+/***************************************************************************************************************/
 
-Bellerophon::Bellerophon(string name, string o)  {
+Bellerophon::Bellerophon(string name, bool filterSeqs,  bool c, int win, int inc, int p, string o) : Chimera() {
        try {
                fastafile = name;
+               correction = c;
                outputDir = o;
+               window = win;
+               increment = inc;
+               processors = p;
+               
+               //read in sequences
+               seqs = readSeqs(fastafile);
+               numSeqs = seqs.size();
+               if (numSeqs == 0) { m->mothurOut("Error in reading you sequences."); m->mothurOutEndLine(); exit(1); }
+       
+               //do soft filter
+               if (filterSeqs)  {
+                       createFilter(seqs, 0.5);
+                       for (int i = 0; i < seqs.size(); i++) {  runFilter(seqs[i]);  }
+               }
+               
+               distCalculator = new eachGapDist();
+               
+               //set default window to 25% of sequence length
+               string seq0 = seqs[0]->getAligned();
+               if (window == 0) { window = seq0.length() / 4;  }
+               else if (window > (seq0.length() / 2)) {  
+                       m->mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); m->mothurOutEndLine();
+                       window = (seq0.length() / 2);
+               }
+               
+               if (increment > (seqs[0]->getAlignLength() - (2*window))) { 
+                       if (increment != 10) {
+                       
+                               m->mothurOut("You have selected a increment that is too large. I will use the default."); m->mothurOutEndLine();
+                               increment = 10;
+                               if (increment > (seqs[0]->getAlignLength() - (2*window))) {  increment = 0;  }
+                               
+                       }else{ increment = 0; }
+               }
+               
+               if (increment == 0) { iters = 1; }
+               else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); }
+               
+               //initialize pref
+               pref.resize(iters);
+               for (int i = 0; i < iters; i++) { 
+                       Preference temp;
+                       for (int j = 0; j < numSeqs; j++) {  
+                               pref[i].push_back(temp); 
+                       }
+               } 
+
        }
        catch(exception& e) {
                m->errorOut(e, "Bellerophon", "Bellerophon");
@@ -30,20 +78,26 @@ Bellerophon::Bellerophon(string name, string o)  {
 int Bellerophon::print(ostream& out, ostream& outAcc) {
        try {
                int above1 = 0;
+               
+               //sorted "best" preference scores for all seqs
+               vector<Preference> best = getBestPref();
+               
+               if (m->control_pressed) { return numSeqs; }
+               
                out << "Name\tScore\tLeft\tRight\t" << endl;
                //output prefenence structure to .chimeras file
-               for (int i = 0; i < pref.size(); i++) {
+               for (int i = 0; i < best.size(); i++) {
                        
-                       if (m->control_pressed) {  return 0; }
+                       if (m->control_pressed) {  return numSeqs; }
                        
-                       out << pref[i].name << '\t' << setprecision(3) << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
+                       out << best[i].name << '\t' << setprecision(3) << best[i].score << '\t' << best[i].leftParent << '\t' << best[i].rightParent << endl;
                        
                        //calc # of seqs with preference above 1.0
-                       if (pref[i].score[0] > 1.0) { 
+                       if (best[i].score > 1.0) { 
                                above1++; 
-                               outAcc << pref[i].name << endl;
-                               m->mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); m->mothurOutEndLine();
-                               m->mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); m->mothurOutEndLine();
+                               outAcc << best[i].name << endl;
+                               m->mothurOut(best[i].name + " is a suspected chimera at breakpoint " + toString(best[i].midpoint)); m->mothurOutEndLine();
+                               m->mothurOut("It's score is " + toString(best[i].score) + " with suspected left parent " + best[i].leftParent + " and right parent " + best[i].rightParent); m->mothurOutEndLine();
                        }
                }
                
@@ -51,22 +105,22 @@ int Bellerophon::print(ostream& out, ostream& outAcc) {
                m->mothurOutEndLine();
                m->mothurOut("Sequence with preference score above 1.0: " + toString(above1)); m->mothurOutEndLine();
                int spot;
-               spot = pref.size()-1;
-               m->mothurOut("Minimum:\t" + toString(pref[spot].score[0])); m->mothurOutEndLine();
-               spot = pref.size() * 0.975;
-               m->mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); m->mothurOutEndLine();
-               spot = pref.size() * 0.75;
-               m->mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); m->mothurOutEndLine();
-               spot = pref.size() * 0.50;
-               m->mothurOut("Median: \t" + toString(pref[spot].score[0])); m->mothurOutEndLine();
-               spot = pref.size() * 0.25;
-               m->mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); m->mothurOutEndLine();
-               spot = pref.size() * 0.025;
-               m->mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); m->mothurOutEndLine();
+               spot = best.size()-1;
+               m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+               spot = best.size() * 0.975;
+               m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+               spot = best.size() * 0.75;
+               m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+               spot = best.size() * 0.50;
+               m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
+               spot = best.size() * 0.25;
+               m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+               spot = best.size() * 0.025;
+               m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
                spot = 0;
-               m->mothurOut("Maximum:\t" + toString(pref[spot].score[0])); m->mothurOutEndLine();
+               m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
                
-               return 1;
+               return numSeqs;
 
        }
        catch(exception& e) {
@@ -74,191 +128,361 @@ int Bellerophon::print(ostream& out, ostream& outAcc) {
                exit(1);
        }
 }
-
-//********************************************************************************************************************
-//sorts highest score to lowest
-inline bool comparePref(Preference left, Preference right){
-       return (left.score[0] > right.score[0]);        
-}
-
+#ifdef USE_MPI
 //***************************************************************************************************************
-int Bellerophon::getChimeras() {
+int Bellerophon::print(MPI_File& out, MPI_File& outAcc) {
        try {
                
-               //do soft filter
-               if (filter)  {
-                       string optionString = "fasta=" + fastafile + ", soft=50";
-                       if (outputDir != "") { optionString += ", outputdir=" + outputDir; }
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               if (pid == 0) {
+                       string outString = "";
+                                               
+                       //sorted "best" preference scores for all seqs
+                       vector<Preference> best = getBestPref();
                        
-                       filterSeqs = new FilterSeqsCommand(optionString);
-                       filterSeqs->execute();
-                       delete filterSeqs;
+                       int above1 = 0;
+                       int ninetyfive = best.size() * 0.05;
+                       float cutoffScore = best[ninetyfive].score;
+
+                       if (m->control_pressed) { return numSeqs; }
                        
-                       if (m->control_pressed) { return 0; }
+                       outString += "Name\tScore\tLeft\tRight\n";
+                       //output prefenence structure to .chimeras file
+                       for (int i = 0; i < best.size(); i++) {
+                               
+                               if (m->control_pressed) {  return numSeqs; }
+                               
+                               outString += best[i].name + "\t" +  toString(best[i].score) + "\t" + best[i].leftParent + "\t" + best[i].rightParent + "\n";
+                               
+                               MPI_Status status;
+                               int length = outString.length();
+                               char buf2[length];
+                               strcpy(buf2, outString.c_str()); 
+                                       
+                               MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
+
+                               
+                               //calc # of seqs with preference above 95%tile
+                               if (best[i].score >= cutoffScore) { 
+                                       above1++; 
+                                       string outAccString;
+                                        outAccString += best[i].name + "\n";
+                                       
+                                       MPI_Status statusAcc;
+                                       length = outAccString.length();
+                                       char buf[length];
+                                       strcpy(buf, outAccString.c_str()); 
+                                       
+                                       MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
+                                       
+                                       cout << best[i].name << " is a suspected chimera at breakpoint " << toString(best[i].midpoint) << endl;
+                                       cout << "It's score is " << toString(best[i].score) << " with suspected left parent " << best[i].leftParent << " and right parent " << best[i].rightParent << endl;
+                               }
+                       }
                        
-                       //reset fastafile to filtered file
-                       if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
-                       else                             { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
+                       //output results to screen
+                       m->mothurOutEndLine();
+                       m->mothurOut("Sequence with preference score above " + toString(cutoffScore) +  ": " + toString(above1)); m->mothurOutEndLine();
+                       int spot;
+                       spot = best.size()-1;
+                       m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+                       spot = best.size() * 0.975;
+                       m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+                       spot = best.size() * 0.75;
+                       m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+                       spot = best.size() * 0.50;
+                       m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
+                       spot = best.size() * 0.25;
+                       m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+                       spot = best.size() * 0.025;
+                       m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+                       spot = 0;
+                       m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
                        
                }
                
-               distCalculator = new eachGapDist();
+               return numSeqs;
                
-               //read in sequences
-               seqs = readSeqs(fastafile);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bellerophon", "print");
+               exit(1);
+       }
+}
+#endif
+//********************************************************************************************************************
+//sorts highest score to lowest
+inline bool comparePref(Preference left, Preference right){
+       return (left.score > right.score);      
+}
+//***************************************************************************************************************
+int Bellerophon::getChimeras() {
+       try {
                
-               if (m->control_pressed) { return 0; }
+               //create breaking points
+               vector<int> midpoints;   midpoints.resize(iters, window);
+               for (int i = 1; i < iters; i++) {  midpoints[i] = midpoints[i-1] + increment;  }
        
-               if (unaligned) { m->mothurOut("Your sequences need to be aligned when you use the bellerophon method."); m->mothurOutEndLine(); return 1;  }
-               
-               int numSeqs = seqs.size();
-               
-               if (numSeqs == 0) { m->mothurOut("Error in reading you sequences."); m->mothurOutEndLine(); exit(1); }
+       #ifdef USE_MPI
+               int pid, numSeqsPerProcessor; 
+       
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
                
-               //set default window to 25% of sequence length
-               string seq0 = seqs[0]->getAligned();
-               if (window == 0) { window = seq0.length() / 4;  }
-               else if (window > (seq0.length() / 2)) {  
-                       m->mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); m->mothurOutEndLine();
-                       window = (seq0.length() / 2);
-               }
+               numSeqsPerProcessor = iters / processors;
                
-               if (increment > (seqs[0]->getAlignLength() - (2*window))) { 
-                       if (increment != 10) {
-                       
-                               m->mothurOut("You have selected a increment that is too large. I will use the default."); m->mothurOutEndLine();
-                               increment = 10;
-                               if (increment > (seqs[0]->getAlignLength() - (2*window))) {  increment = 0;  }
-                               
-                       }else{ increment = 0; }
+               //each process hits this only once
+               int startPos = pid * numSeqsPerProcessor;
+               if(pid == processors - 1){
+                               numSeqsPerProcessor = iters - pid * numSeqsPerProcessor;
                }
+               lines.push_back(linePair(startPos, numSeqsPerProcessor));
                
-               if (increment == 0) { iters = 1; }
-               else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); }
+               //fill pref with scores
+               driverChimeras(midpoints, lines[0]);
                
-               //initialize pref
-               pref.resize(numSeqs);  
-               
-               for (int i = 0; i < numSeqs; i++ ) { 
-                       pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2);   pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
-                       pref[i].name = seqs[i]->getName();
-                       pref[i].score[0] = 0.0;  pref[i].score[1] = 0.0; 
-                       pref[i].closestLeft[0] = 100000.0;  pref[i].closestLeft[1] = 100000.0;  
-                       pref[i].closestRight[0] = 100000.0;  pref[i].closestRight[1] = 100000.0;  
-               }
-
-               int midpoint = window;
-               int count = 0;
-               while (count < iters) {
-                               
-                               if (m->control_pressed) { return 0; }
+               if (m->control_pressed) { return 0; }
                                
-                               //create 2 vectors of sequences, 1 for left side and one for right side
-                               vector<Sequence> left;  vector<Sequence> right;
+               //each process must send its parts back to pid 0
+               if (pid == 0) {
+                       
+                       //receive results 
+                       for (int j = 1; j < processors; j++) {
                                
-                               for (int i = 0; i < seqs.size(); i++) {
+                               vector<string>  MPIBestSend; 
+                               for (int i = 0; i < numSeqs; i++) {
                                
                                        if (m->control_pressed) { return 0; }
+
+                                       MPI_Status status;
+                                       //receive string
+                                       int length;
+                                       MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status);
                                        
-//cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
-//cout << "whole = " << seqs[i]->getAligned().length() << endl;
-                                       //save left side
-                                       string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
-                                       Sequence tempLeft;
-                                       tempLeft.setName(seqs[i]->getName());
-                                       tempLeft.setAligned(seqLeft);
-                                       left.push_back(tempLeft);
-//cout << "left = " << tempLeft.getAligned().length() << endl;                 
-                                       //save right side
-                                       string seqRight = seqs[i]->getAligned().substr(midpoint, window);
-                                       Sequence tempRight;
-                                       tempRight.setName(seqs[i]->getName());
-                                       tempRight.setAligned(seqRight);
-                                       right.push_back(tempRight);
-//cout << "right = " << seqRight.length() << endl;     
+                                       char buf[length];
+                                       MPI_Recv(&buf, length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status);
+                                       
+                                       string temp = buf;
+                                       if (temp.length() > length) { temp = temp.substr(0, length); }
+                                       
+                                       MPIBestSend.push_back(temp);
                                }
                                
-                               //adjust midpoint by increment
-                               midpoint += increment;
-                               
+                               fillPref(j, MPIBestSend);
                                
-                               //this should be parallelized
-                               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
-                               //create a matrix containing the distance from left to left and right to right
-                               //calculate distances
-                               SparseMatrix* SparseLeft = new SparseMatrix();
-                               SparseMatrix* SparseRight = new SparseMatrix();
-                               
-                               createSparseMatrix(0, left.size(), SparseLeft, left);
+                               if (m->control_pressed) { return 0; }
+                       }
+
+               }else {
+                       //takes best window for each sequence and turns Preference to string that can be parsed by pid 0.
+                       //played with this a bit, but it may be better to try user-defined datatypes with set string lengths??
+                       vector<string> MPIBestSend = getBestWindow(lines[0]);
+                       pref.clear();
+                       
+                       //send your result to parent
+                       for (int i = 0; i < numSeqs; i++) {
                                
-                               if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+                               if (m->control_pressed) { return 0; }
                                
-                               createSparseMatrix(0, right.size(), SparseRight, right);
+                               int bestLength = MPIBestSend[i].length();
+                               char buf[bestLength];
+                               strcpy(buf, MPIBestSend[i].c_str()); 
                                
-                               if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+                               MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                               MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD);
+                       }
+                       
+                       MPIBestSend.clear();
+               }
+               
+       #else
+       
+               //divide breakpoints between processors
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){ 
+                               lines.push_back(linePair(0, iters));    
                                
-                               left.clear(); right.clear();
-                               vector<SeqMap> distMapRight;
-                               vector<SeqMap> distMapLeft;
+                               //fill pref with scores
+                               driverChimeras(midpoints, lines[0]);
+       
+                       }else{
+                       
+                               int numSeqsPerProcessor = iters / processors;
                                
-                               // Create a data structure to quickly access the distance information.
-                               //this is from thallingers reimplementation on get.oturep
-                               // It consists of a vector of distance maps, where each map contains
-                               // all distances of a certain sequence. Vector and maps are accessed
-                               // via the index of a sequence in the distance matrix
-                               distMapRight = vector<SeqMap>(numSeqs); 
-                               distMapLeft = vector<SeqMap>(numSeqs); 
-                               //cout << "left" << endl << endl;
-                               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
-                                       distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
-                                       if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
-                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
-                               }
-                               //cout << "right" << endl << endl;
-                               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
-                                       distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
-                                       if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
-                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                               for (int i = 0; i < processors; i++) {
+                                       int startPos = i * numSeqsPerProcessor;
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = iters - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(linePair(startPos, numSeqsPerProcessor));
                                }
                                
-                               delete SparseLeft;
-                               delete SparseRight;
-                               
-                               //fill preference structure
-                               generatePreferences(distMapLeft, distMapRight, midpoint);
-                               
-                               count++;
-                               
-               }
-               
-               delete distCalculator;
-               
-               //rank preference score to eachother
-               float dme = 0.0;
-               float expectedPercent = 1 / (float) (pref.size());
-               
-               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[0];  }
+                               createProcesses(midpoints);
+                       }
+               #else
+                       lines.push_back(linePair(0, iters));    
+                       
+                       ///fill pref with scores
+                       driverChimeras(midpoints, lines[0]);
+               #endif
+       
+       #endif
        
-               for (int i = 0; i < pref.size(); i++) {
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bellerophon", "getChimeras");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 
-                       //gives the actual percentage of the dme this seq adds
-                       pref[i].score[0] = pref[i].score[0] / dme;
+int Bellerophon::createProcesses(vector<int> mid) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               int exitCommand = 1;
+               vector<int> processIDS;
+                               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
                        
-                       //how much higher or lower is this than expected
-                       pref[i].score[0] = pref[i].score[0] / expectedPercent;
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               exitCommand = driverChimeras(mid, lines[process]);
+                               string tempOut = outputDir + toString(getpid()) + ".temp";
+                               writePrefs(tempOut, lines[process]);
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
                
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
                }
                
-               //sort Preferences highest to lowest
-               sort(pref.begin(), pref.end(), comparePref);
+               //get info that other processes created
+               for (int i = 0; i < processIDS.size(); i++) {
+                       string tempIn = outputDir + toString(processIDS[i]) + ".temp";
+                       readPrefs(tempIn);
+               }
+               
+               return exitCommand;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignCommand", "createProcesses");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int Bellerophon::driverChimeras(vector<int> midpoints, linePair line) {
+       try {
+               
+               for (int h = line.start; h < (line.start + line.num); h++) {
+                       count = h;
+                       int midpoint = midpoints[h];
                
-               for (int i = 0; i < seqs.size(); i++) { delete seqs[i];  }  seqs.clear();
+                       //initialize pref[count]                
+                       for (int i = 0; i < numSeqs; i++ ) { 
+                               pref[count][i].name = seqs[i]->getName();
+                               pref[count][i].midpoint = midpoint;  
+                       }
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //create 2 vectors of sequences, 1 for left side and one for right side
+                       vector<Sequence> left;  vector<Sequence> right;
+                       
+                       for (int i = 0; i < seqs.size(); i++) {
+                               
+                               if (m->control_pressed) { return 0; }
+                               
+                               //cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
+                               //cout << "whole = " << seqs[i]->getAligned().length() << endl;
+                               //save left side
+                               string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
+                               Sequence tempLeft;
+                               tempLeft.setName(seqs[i]->getName());
+                               tempLeft.setAligned(seqLeft);
+                               left.push_back(tempLeft);
+                               //cout << "left = " << tempLeft.getAligned().length() << endl;                  
+                               //save right side
+                               string seqRight = seqs[i]->getAligned().substr(midpoint, window);
+                               Sequence tempRight;
+                               tempRight.setName(seqs[i]->getName());
+                               tempRight.setAligned(seqRight);
+                               right.push_back(tempRight);
+                               //cout << "right = " << seqRight.length() << endl;      
+                       }
+                       
+                       //this should be parallelized
+                       //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
+                       //create a matrix containing the distance from left to left and right to right
+                       //calculate distances
+                       SparseMatrix* SparseLeft = new SparseMatrix();
+                       SparseMatrix* SparseRight = new SparseMatrix();
+                       
+                       createSparseMatrix(0, left.size(), SparseLeft, left);
+                       
+                       if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+                       
+                       createSparseMatrix(0, right.size(), SparseRight, right);
+                       
+                       if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+                       
+                       left.clear(); right.clear();
+                       vector<SeqMap> distMapRight;
+                       vector<SeqMap> distMapLeft;
+                       
+                       // Create a data structure to quickly access the distance information.
+                       //this is from thallingers reimplementation on get.oturep
+                       // It consists of a vector of distance maps, where each map contains
+                       // all distances of a certain sequence. Vector and maps are accessed
+                       // via the index of a sequence in the distance matrix
+                       distMapRight = vector<SeqMap>(numSeqs); 
+                       distMapLeft = vector<SeqMap>(numSeqs); 
+                       //cout << "left" << endl << endl;
+                       for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
+                               distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
+                               if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+                               //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                       }
+                       //cout << "right" << endl << endl;
+                       for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
+                               distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
+                               if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+                               //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                       }
+                       
+                       delete SparseLeft;
+                       delete SparseRight;
+                       
+                       //fill preference structure
+                       generatePreferences(distMapLeft, distMapRight, midpoint);
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //report progress
+                       if((h+1) % 10 == 0){    cout << "Processing sliding window: " << toString(h+1) <<  "\n";  m->mothurOutJustToLog("Processing sliding window: " + toString(h+1) + "\n") ;         }
+                       
+               }
                
+               //report progress
+               if((line.start + line.num) % 10 != 0){  cout << "Processing sliding window: " << toString(line.start + line.num) <<  "\n";  m->mothurOutJustToLog("Processing sliding window: " + toString(line.start + line.num) + "\n") ;             }
+
                return 0;
                
        }
        catch(exception& e) {
-               m->errorOut(e, "Bellerophon", "getChimeras");
+               m->errorOut(e, "Bellerophon", "driverChimeras");
                exit(1);
        }
 }
@@ -297,15 +521,6 @@ int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right,
                SeqMap::iterator itR;
                SeqMap::iterator itL;
                
-               //initialize pref[i]
-               for (int i = 0; i < pref.size(); i++) {
-                       pref[i].score[1] = 0.0;
-                       pref[i].closestLeft[1] = 100000.0; 
-                       pref[i].closestRight[1] = 100000.0; 
-                       pref[i].leftParent[1] = "";
-                       pref[i].rightParent[1] = "";
-               }
-       
                for (int i = 0; i < left.size(); i++) {
                        
                        SeqMap currentLeft = left[i];    //example i = 3;   currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
@@ -326,15 +541,15 @@ int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right,
                                if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
                                
                                        if (!correction) {
-                                               pref[i].score[1] += abs((itL->second - itR->second));
-                                               pref[j].score[1] += abs((itL->second - itR->second));
+                                               pref[count][i].score += abs((itL->second - itR->second));
+                                               pref[count][j].score += abs((itL->second - itR->second));
 //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
 //cout << "abs = " << abs((itL->second - itR->second)) << endl;
 //cout << i << " score = " << pref[i].score[1] << endl;
 //cout << j << " score = " << pref[j].score[1] << endl;
                                        }else {
-                                               pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
-                                               pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+                                               pref[count][i].score += abs((sqrt(itL->second) - sqrt(itR->second)));
+                                               pref[count][j].score += abs((sqrt(itL->second) - sqrt(itR->second)));
 //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
 //cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
 //cout << i << " score = " << pref[i].score[1] << endl;
@@ -342,27 +557,27 @@ int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right,
                                        }
 //cout << "pref[" << i << "].closestLeft[1] = "        <<      pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;                        
                                        //are you the closest left sequence
-                                       if (itL->second < pref[i].closestLeft[1]) {  
+                                       if (itL->second < pref[count][i].closestLeft) {  
 
-                                               pref[i].closestLeft[1] = itL->second;
-                                               pref[i].leftParent[1] = seqs[j]->getName();
+                                               pref[count][i].closestLeft = itL->second;
+                                               pref[count][i].leftParent = seqs[j]->getName();
 //cout << "updating closest left to " << pref[i].leftParent[1] << endl;
                                        }
 //cout << "pref[" << j << "].closestLeft[1] = "        <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
-                                       if (itL->second < pref[j].closestLeft[1]) { 
-                                               pref[j].closestLeft[1] = itL->second;
-                                               pref[j].leftParent[1] = seqs[i]->getName();
+                                       if (itL->second < pref[count][j].closestLeft) { 
+                                               pref[count][j].closestLeft = itL->second;
+                                               pref[count][j].leftParent = seqs[i]->getName();
 //cout << "updating closest left to " << pref[j].leftParent[1] << endl;
                                        }
                                        
                                        //are you the closest right sequence
-                                       if (itR->second < pref[i].closestRight[1]) {   
-                                               pref[i].closestRight[1] = itR->second;
-                                               pref[i].rightParent[1] = seqs[j]->getName();
+                                       if (itR->second < pref[count][i].closestRight) {   
+                                               pref[count][i].closestRight = itR->second;
+                                               pref[count][i].rightParent = seqs[j]->getName();
                                        }
-                                       if (itR->second < pref[j].closestRight[1]) {   
-                                               pref[j].closestRight[1] = itR->second;
-                                               pref[j].rightParent[1] = seqs[i]->getName();
+                                       if (itR->second < pref[count][j].closestRight) {   
+                                               pref[count][j].closestRight = itR->second;
+                                               pref[count][j].rightParent = seqs[i]->getName();
                                        }
                                        
                                }
@@ -370,55 +585,190 @@ int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right,
                
                }
                
+                               
+               return 1;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bellerophon", "generatePreferences");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<Preference> Bellerophon::getBestPref() {
+       try {
+               
+               vector<Preference> best;
                
-                 
-               //calculate the dme
-               int count0 = 0;
-               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[1];  if (pref[i].score[1] == 0.0) { count0++; }  }
+               //for each sequence
+               for (int i = 0; i < numSeqs; i++) {
+                       
+                       //set best pref score to first one
+                       Preference temp = pref[0][i];
+                       
+                       if (m->control_pressed) { return best;  }
+                       
+                       //for each window
+                       for (int j = 1; j < pref.size(); j++) {
+                               
+                               //is this a better score
+                               if (pref[j][i].score > temp.score) {    temp = pref[j][i];              }
+                       }
+                       
+                       best.push_back(temp);
+               }
                
-               float expectedPercent = 1 / (float) (pref.size() - count0);
-//cout << endl << "dme = " << dme << endl;
-               //recalculate prefernences based on dme
-               for (int i = 0; i < pref.size(); i++) {
+               //rank preference score to eachother
+               float dme = 0.0;
+               float expectedPercent = 1 / (float) (best.size());
                
-                       if (m->control_pressed) {  return 0; }
-//cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl;        
-                       // gives the actual percentage of the dme this seq adds
-                       pref[i].score[1] = pref[i].score[1] / dme;
+               for (int i = 0; i < best.size(); i++) {  dme += best[i].score;  }
+       
+               for (int i = 0; i < best.size(); i++) {
+
+                       if (m->control_pressed) { return best; }
+                       
+                       //gives the actual percentage of the dme this seq adds
+                       best[i].score = best[i].score / dme;
                        
                        //how much higher or lower is this than expected
-                       pref[i].score[1] = pref[i].score[1] / expectedPercent;
+                       best[i].score = best[i].score / expectedPercent;
+               
+               }
+               
+               //sort Preferences highest to lowest
+               sort(best.begin(), best.end(), comparePref);
+
+               return best;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bellerophon", "getBestPref");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Bellerophon::writePrefs(string file, linePair tempLine) {
+       try {
+       
+               ofstream outTemp;
+               openOutputFile(file, outTemp);
+               
+               //lets you know what part of the pref matrix you are writing
+               outTemp << tempLine.start << '\t' << tempLine.num << endl;
+               
+               for (int i = tempLine.start; i < (tempLine.start + tempLine.num); i++) {
+                       
+                       for (int j = 0; j < numSeqs; j++) {
+                               
+                               if (m->control_pressed) { outTemp.close(); remove(file.c_str()); return 0; }
+                               
+                               outTemp << pref[i][j].name << '\t' << pref[i][j].leftParent << '\t' << pref[i][j].rightParent << '\t';
+                               outTemp << pref[i][j].score << '\t' << pref[i][j].closestLeft << '\t' << pref[i][j].closestRight << '\t' << pref[i][j].midpoint <<  endl;
+                       }
+               }
+               
+               outTemp.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bellerophon", "writePrefs");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Bellerophon::readPrefs(string file) {
+       try {
+       
+               ifstream inTemp;
+               openInputFile(file, inTemp);
+               
+               int start, num;
+               
+               //lets you know what part of the pref matrix you are writing
+               inTemp >> start >> num;  gobble(inTemp);
+               
+               for (int i = start; i < num; i++) {
                        
-                       //pref[i].score[1] = dme / (dme - 2 * pref[i].score[1]);
+                       for (int j = 0; j < numSeqs; j++) {
+                               
+                               if (m->control_pressed) { inTemp.close(); remove(file.c_str()); return 0; }
                        
-                       //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher.
-//cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl;                                  
+                               inTemp >> pref[i][j].name >> pref[i][j].leftParent >> pref[i][j].rightParent;
+                               inTemp >> pref[i][j].score >> pref[i][j].closestLeft >> pref[i][j].closestRight >> pref[i][j].midpoint;
+                               gobble(inTemp);
+                       }
                }
                
-               //is this score bigger then the last score
-               for (int i = 0; i < pref.size(); i++) {  
+               inTemp.close();
+               
+               remove(file.c_str());
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bellerophon", "writePrefs");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<string> Bellerophon::getBestWindow(linePair line) {
+       try {
+       
+               vector<string> best;
+                       
+               //for each sequence
+               for (int i = 0; i < numSeqs; i++) {
+                       
+                       //set best pref score to first one
+                       Preference temp = pref[line.start][i];
                        
-                       if (m->control_pressed) {  return 0; }
+                       if (m->control_pressed) { return best;  }
                        
-                       //update biggest score
-                       if (pref[i].score[1] > pref[i].score[0]) {
-                               pref[i].score[0] = pref[i].score[1];
-                               pref[i].leftParent[0] = pref[i].leftParent[1];
-                               pref[i].rightParent[0] = pref[i].rightParent[1];
-                               pref[i].closestLeft[0] = pref[i].closestLeft[1];
-                               pref[i].closestRight[0] = pref[i].closestRight[1];
-                               pref[i].midpoint = mid;
+                       //for each window
+                       for (int j = (line.start+1); j < (line.start+line.num); j++) {
+                               
+                               //is this a better score
+                               if (pref[j][i].score > temp.score) {    temp = pref[j][i];              }
                        }
                        
+                       string tempString = temp.name + '\t' + temp.leftParent + '\t' + temp.rightParent + '\t' + toString(temp.score);
+                       best.push_back(tempString);
                }
+
+               return best;
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bellerophon", "getBestWindow");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Bellerophon::fillPref(int process, vector<string>& best) {
+       try {
+               //figure out where you start so you can put the best scores there
+               int numSeqsPerProcessor = iters / processors;
+               int start = process * numSeqsPerProcessor;
                
-               return 1;
+               for (int i = 0; i < best.size(); i++) {
+               
+                       if (m->control_pressed) { return 0;  }
+                       
+                       istringstream iss (best[i],istringstream::in);
+                       
+                       string tempScore;
+                       iss >> pref[start][i].name >> pref[start][i].leftParent >> pref[start][i].rightParent >> tempScore;
+                       convert(tempScore, pref[start][i].score); 
+               }
 
+               return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "Bellerophon", "generatePreferences");
+               m->errorOut(e, "Bellerophon", "fillPref");
                exit(1);
        }
 }
+
 /**************************************************************************************************/
 
index 3d05617d50ad0a9dccb05da2d21bc872e8ca842f..1333ec8f16d8d0985a2b289b65294ef1f94a95f3 100644 (file)
@@ -12,7 +12,6 @@
 
 
 #include "chimera.h"
-#include "filterseqscommand.h"
 #include "sparsematrix.hpp"
 #include "sequence.hpp"
 #include "dist.h"
@@ -25,22 +24,41 @@ typedef map<int, float> SeqMap;  //maps sequence to all distance for that seqeun
 class Bellerophon : public Chimera {
        
        public:
-               Bellerophon(string, string);    
-               ~Bellerophon() {};
+               Bellerophon(string, bool, bool, int, int, int, string); //fastafile, filter, correction, window, increment, processors, outputDir);     
+               ~Bellerophon() { delete distCalculator; for (int i = 0; i < seqs.size(); i++) { delete seqs[i];  }  seqs.clear(); }
                
                int getChimeras();
                int print(ostream&, ostream&);
                
+               #ifdef USE_MPI
+               int print(MPI_File&, MPI_File&);
+               #endif
+               
        private:
+               struct linePair {
+                       int start;
+                       int num;
+                       linePair(long int i, int j) : start(i), num(j) {}
+               };
+               
+               vector<linePair> lines;
+       
                Dist* distCalculator;
-               FilterSeqsCommand* filterSeqs;
                vector<Sequence*> seqs;
-               vector<Preference> pref;
+               vector< vector<Preference> > pref; //pref[0] = preference scores for all seqs in window 0.
                string fastafile;
-               int iters;
+               int iters, count, window, increment, numSeqs, processors; //iters = number of windows
+               bool correction;
                
                int generatePreferences(vector<SeqMap>, vector<SeqMap>, int);
                int createSparseMatrix(int, int, SparseMatrix*, vector<Sequence>);
+               vector<Preference> getBestPref();
+               int driverChimeras(vector<int>, linePair);
+               int createProcesses(vector<int>);
+               int writePrefs(string, linePair);
+               int readPrefs(string);
+               vector<string> getBestWindow(linePair line);
+               int fillPref(int, vector<string>&);
 };
 
 /***********************************************************/
index 17db069cdedb8a019d980c32fc4b6e7767fab27b..780afe0d485627fb63c3d7265f55b2aee19147fb 100644 (file)
@@ -25,6 +25,19 @@ gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
        queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
        blastFileName = toString(randNumber) + ".blast";
 
+}
+/**************************************************************************************************/
+
+BlastDB::BlastDB() : Database() {
+       
+       globaldata = GlobalData::getInstance();
+       count = 0;
+
+       int randNumber = rand();
+       dbFileName = toString(randNumber) + ".template.unaligned.fasta";
+       queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
+       blastFileName = toString(randNumber) + ".blast";
+
 }
 
 /**************************************************************************************************/
@@ -181,6 +194,56 @@ void BlastDB::generateDB() {
                exit(1);
        }
 }
+#ifdef USE_MPI 
+/**************************************************************************************************/
+int BlastDB::MPISend(int receiver) {
+       try {
+               
+               //send gapOpen - float
+               MPI_Send(&gapOpen, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
+
+               //send gapExtend - float
+               MPI_Send(&gapExtend, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
+               
+               //send match - float
+               MPI_Send(&match, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
+               
+               //send mismatch - float
+               MPI_Send(&misMatch, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
+                                                                       
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "BlastDB", "MPISend");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int BlastDB::MPIRecv(int sender) {
+       try {
+               MPI_Status status;
+               
+               //receive gapOpen - float
+               MPI_Recv(&gapOpen, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
+               
+               //receive gapExtend - float
+               MPI_Recv(&gapExtend, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
+                               
+               //receive match - float
+               MPI_Recv(&match, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
+               
+               //receive mismatch - float
+               MPI_Recv(&misMatch, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);               
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "BlastDB", "MPIRecv");
+               exit(1);
+       }
+}
+#endif 
+/**************************************************************************************************/
 
 /**************************************************************************************************/
 
index 0f8fccd7f19507737642e218e4a149c5f4a60e0b..d61aaecb5e0712b47458e000416b881719cc7e65 100644 (file)
@@ -18,13 +18,19 @@ class BlastDB : public Database {
 
 public:
        BlastDB(float, float, float, float);
+       BlastDB();
        ~BlastDB();
        
        void generateDB();
        void addSequence(Sequence);
        vector<int> findClosestSequences(Sequence*, int);
        vector<int> findClosestMegaBlast(Sequence*, int);
-
+       
+       #ifdef USE_MPI  
+       int MPISend(int); //just sends gapOpen, gapExtend, match and mismatch
+       int MPIRecv(int);
+       #endif
+       
 private:
        string dbFileName;
        string queryFileName;
index 3aad3f6b48780505cb5f88c733b28027e28968b1..56856a9e4ac545627068eec2786dfdb4c476d1fb 100644 (file)
--- a/ccode.cpp
+++ b/ccode.cpp
 
 
 //***************************************************************************************************************
-Ccode::Ccode(string filename, string o) {  
-       fastafile = filename;  outputDir = o; 
+Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() {  
+       fastafile = filename;  
+       outputDir = o; 
+       templateFileName = temp;  templateSeqs = readSeqs(temp);
+       setMask(mask);
+       filter = f;
+       window = win;
+       numWanted = numW;
+       
        distCalc = new eachGapDist();
        decalc = new DeCalculator();
        
        mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
-       ofstream out2;
-       openOutputFile(mapInfo, out2);
+       
+       #ifdef USE_MPI
                
-       out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
-       out2.close();
+               char inFileName[mapInfo.length()];
+               strcpy(inFileName, mapInfo.c_str());
+               
+               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+
+               MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap);  //comm, filename, mode, info, filepointer
+               
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               if (pid == 0) {
+                       string outString = "Place in masked, filtered and trimmed sequence\tPlace in original alignment\n";
+                       
+                       MPI_Status status;
+                       int length = outString.length();
+                       char buf2[length];
+                       strcpy(buf2, outString.c_str()); 
+                               
+                       MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status);
+               }
+       #else
+
+               ofstream out2;
+               openOutputFile(mapInfo, out2);
+               
+               out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+               out2.close();
+       #endif
 }
 //***************************************************************************************************************
 Ccode::~Ccode() {
        delete distCalc;
        delete decalc;
+       
+       #ifdef USE_MPI
+               MPI_File_close(&outMap);
+       #endif
 }      
 //***************************************************************************************************************
-void Ccode::printHeader(ostream& out) {
-       out << "For full window mapping info refer to " << mapInfo << endl << endl;
-}
-//***************************************************************************************************************
 int Ccode::print(ostream& out, ostream& outAcc) {
        try {
                
@@ -116,13 +149,136 @@ int Ccode::print(ostream& out, ostream& outAcc) {
                //free memory
                for (int i = 0; i < closest.size(); i++) {  delete closest[i].seq;  }
 
-               return 0;
+               return results;
        }
        catch(exception& e) {
                m->errorOut(e, "Ccode", "print");
                exit(1);
        }
 }
+#ifdef USE_MPI
+//***************************************************************************************************************
+int Ccode::print(MPI_File& out, MPI_File& outAcc) {
+       try {
+               
+               string outMapString = "";
+               
+               outMapString += querySeq->getName() + "\n";
+               for (it = spotMap.begin(); it!= spotMap.end(); it++) {
+                       outMapString += toString(it->first)  + "\t" + toString(it->second)  + "\n";
+               }
+               printMapping(outMapString);
+               outMapString = "";
+               
+               string outString = "";
+               string outAccString = "";
+               
+               outString +=  querySeq->getName() + "\n\nReference sequences used and distance to query:\n";
+                       
+               for (int j = 0; j < closest.size(); j++) {
+                       outString += closest[j].seq->getName() + "\t" + toString(closest[j].dist) + "\n";
+               }
+               outString += "\n\nMapping information: ";
+               
+               //for each window
+               //window mapping info.
+               //you mask and did not filter
+               if ((seqMask != "") && (!filter)) { outString += "mask and trim."; }
+                               
+               //you filtered and did not mask
+               if ((seqMask == "") && (filter)) { outString += "filter and trim."; }
+                               
+               //you masked and filtered
+               if ((seqMask != "") && (filter)) { outString += "mask, filter and trim."; }
+                       
+               outString += "\nWindow\tStartPos\tEndPos\n";
+               it = trim.begin();
+               for (int k = 0; k < windows.size()-1; k++) {
+                       outString += toString(k+1) + "\t" + toString(spotMap[windows[k]-it->first]) + "\t" + toString(spotMap[windows[k]-it->first+windowSizes]) + "\n";
+               }
+                       
+               outString += toString(windows.size()) + "\t" + toString(spotMap[windows[windows.size()-1]-it->first]) + "\t" + toString(spotMap[it->second-it->first-1]) + "\n\n";
+               
+               outString += "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova\n";
+               for (int k = 0; k < windows.size(); k++) {
+                       float ds = averageQuery[k] / averageRef[k]; 
+                       outString += toString(k+1) + "\t" + toString(averageQuery[k]) + "\t" + toString(sdQuery[k]) + "\t" + toString(averageRef[k]) + "\t" + toString(sdRef[k]) + "\t" + toString(ds) + "\t" + toString(anova[k]) + "\n";
+               }
+                       
+               //varRef
+               //varQuery
+               /* F test for differences among variances.
+               * varQuery is expected to be higher or similar than varRef */
+               //float fs = varQuery[query] / varRef[query];   /* F-Snedecor, test for differences of variances */
+                       
+               bool results = false;   
+                                       
+               //confidence limit, t - Student, anova
+               outString += "\nWindow\tConfidenceLimit\tt-Student\tAnova\n";
+                       
+               for (int k = 0; k < windows.size(); k++) {
+                       string temp = "";
+                       if (isChimericConfidence[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
+                               
+                       if (isChimericTStudent[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
+                               
+                       if (isChimericANOVA[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
+                       
+                       outString += toString(k+1) + "\t" + temp + "\n";
+                               
+                       if (temp == "*\t*\t*\t") {  results = true;  }
+               }
+               outString += "\n";      
+               
+               MPI_Status status;
+               int length = outString.length();
+               char buf2[length];
+               strcpy(buf2, outString.c_str()); 
+                               
+               MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
+                       
+               if (results) {
+                       m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine();
+                       outAccString += querySeq->getName() + "\n";
+                       
+                       MPI_Status statusAcc;
+                       length = outAccString.length();
+                       char buf[length];
+                       strcpy(buf, outAccString.c_str()); 
+                               
+                       MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
+               }
+
+               //free memory
+               for (int i = 0; i < closest.size(); i++) {  delete closest[i].seq;  }
+
+               return results;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Ccode", "print");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int Ccode::printMapping(string& output) {
+       try {
+                       MPI_Status status;
+                       int length = output.length();
+                       char buf[length];
+                       strcpy(buf, output.c_str()); 
+                               
+                       MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status);
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Ccode", "printMapping");
+               exit(1);
+       }
+}
+#endif
 //***************************************************************************************************************
 int Ccode::getChimeras(Sequence* query) {
        try {
diff --git a/ccode.h b/ccode.h
index afc77cce5d9c003bc94ad16f970819a8f5c4680e..91ef84517053c22129c0257daddf11dbcf50d041 100644 (file)
--- a/ccode.h
+++ b/ccode.h
 class Ccode : public Chimera {
        
        public:
-               Ccode(string, string);  
+               Ccode(string, string, bool, string, int, int, string);  //fasta, template, filter, mask, window, numWanted, outputDir
                ~Ccode();
                
                int getChimeras(Sequence* query);
                int print(ostream&, ostream&);
-               void printHeader(ostream&);             
+               
+               #ifdef USE_MPI
+               int print(MPI_File&, MPI_File&);
+               #endif
        private:
        
                Dist* distCalc;
                DeCalculator* decalc;
-               int iters;
+               int iters, window, numWanted;
                string fastafile, mapInfo;
                
                Sequence* querySeq;
@@ -75,6 +78,12 @@ class Ccode : public Chimera {
                int getDiff(string, string);  //return number of mismatched bases, a gap to base is not counted as a mismatch
                float getT(int); 
                float getF(int); 
+               
+               #ifdef USE_MPI
+               int printMapping(string&);
+               MPI_File outMap;
+               #endif
+
 };
 
 /***********************************************************/
index 7eeca96316511531532232d56519e1900cd7a9c6..692a4fee837376232981cfaf91934b96b5efa75d 100644 (file)
@@ -42,7 +42,6 @@ string Chimera::createFilter(vector<Sequence*> seqs, float t) {
                        }
                }
                
-               //zero out spot where all sequences have blanks
                //zero out spot where all sequences have blanks
                int numColRemoved = 0;
                for(int i = 0;i < seqs[0]->getAligned().length(); i++){
@@ -55,7 +54,8 @@ string Chimera::createFilter(vector<Sequence*> seqs, float t) {
                        //cout << "a = " << a[i] <<  " t = " << t[i] <<  " g = " << g[i] <<  " c = " << c[i] << endl;
                }
 
-               m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  m->mothurOutEndLine();
+               if (threshold != 0.0) {  m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  m->mothurOutEndLine();  }
+               
                return filterString;
        }
        catch(exception& e) {
@@ -93,14 +93,68 @@ map<int, int> Chimera::runFilter(Sequence* seq) {
 vector<Sequence*> Chimera::readSeqs(string file) {
        try {
        
-               m->mothurOut("Reading sequences... "); cout.flush();
-               ifstream in;
-               openInputFile(file, in);
-               
                vector<Sequence*> container;
                int count = 0;
                length = 0;
                unaligned = false;
+
+               m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+               
+               #ifdef USE_MPI  
+                       int pid;
+                       vector<long> positions;
+                       int numSeqs;
+               
+                       MPI_Status status; 
+                       MPI_File inMPI;
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+       
+                       char inFileName[file.length()];
+                       strcpy(inFileName, file.c_str());
+       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+       
+                       if (pid == 0) {
+                               positions = setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
+
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
+                       }else{
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               positions.resize(numSeqs+1);
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                       }
+                       
+                       //read file 
+                       for(int i=0;i<numSeqs;i++){
+                       
+                               if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
+       
+                               //read next sequence
+                               int seqlength = positions[i+1] - positions[i];
+                               char buf4[seqlength];
+                               MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+                               
+                               string tempBuf = buf4;
+                               if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
+                               
+                               istringstream iss (tempBuf,istringstream::in);
+               
+                               Sequence* current = new Sequence(iss);   
+                               if (current->getName() != "") {
+                                       if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
+                                       else if (length != current->getAligned().length()) {    unaligned = true;       }
+                       
+                                       container.push_back(current);  
+                               }
+                       }
+                       
+                       MPI_File_close(&inMPI);
+       #else
+
+               ifstream in;
+               openInputFile(file, in);
                
                //read in seqs and store in vector
                while(!in.eof()){
@@ -110,14 +164,13 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                        Sequence* current = new Sequence(in);  gobble(in);
                        
                        if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
-                       else if (length != current->getAligned().length()) { //seqs are unaligned
-                               unaligned = true;
-                       }
-                       
+                       else if (length != current->getAligned().length()) {    unaligned = true;       }
+                                               
                        if (current->getName() != "") {  container.push_back(current);  }
                }
-               
                in.close();
+       #endif
+       
                m->mothurOut("Done."); m->mothurOutEndLine();
                
                return container;
@@ -137,64 +190,53 @@ void Chimera::setMask(string filename) {
                }else if (filename == "") {  //do nothing 
                        seqMask = "";
                }else{
+               
+       #ifdef USE_MPI  
+                       MPI_File inMPI;
+                       MPI_Offset size;
+                       MPI_Status status;
+                       
+                       char inFileName[filename.length()];
+                       strcpy(inFileName, filename.c_str());
+       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       MPI_File_get_size(inMPI, &size);
+                       
+                       char buffer[size];
+                       MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
+                       
+                       string tempBuf = buffer;
+                       if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+                       istringstream iss (tempBuf,istringstream::in);
+                       
+                       if (!iss.eof()) {
+                               Sequence temp(iss);
+                               seqMask = temp.getAligned();
+                       }else {
+                               m->mothurOut("Problem with mask."); m->mothurOutEndLine(); 
+                               seqMask = "";
+                       }
+                       
+                       MPI_File_close(&inMPI);
+       #else
+       
                        ifstream infile;
                        openInputFile(filename, infile);
                        
-                       while (!infile.eof()) {
+                       if (!infile.eof()) {
                                Sequence temp(infile);
                                seqMask = temp.getAligned();
-                               
-                               gobble(infile);
+                       }else {
+                               m->mothurOut("Problem with mask."); m->mothurOutEndLine(); 
+                               seqMask = "";
                        }
-                       
                        infile.close();
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Chimera", "setMask");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-
-vector< vector<float> > Chimera::readQuantiles() {
-       try {
-       
-               ifstream in;
-               openInputFile(quanfile, in);
-               
-               vector< vector<float> > quan;
-               vector <float> temp; temp.resize(6, 0);
-               
-               //to fill 0
-               quan.push_back(temp); 
-       
-               int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
-               
-               while(!in.eof()){
-                       
-                       in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
-                       
-                       temp.clear();
-                       
-                       temp.push_back(ten); 
-                       temp.push_back(twentyfive);
-                       temp.push_back(fifty);
-                       temp.push_back(seventyfive);
-                       temp.push_back(ninetyfive);
-                       temp.push_back(ninetynine);
-                       
-                       quan.push_back(temp);  
+       #endif
        
-                       gobble(in);
                }
-               
-               in.close();
-               return quan;
-               
        }
        catch(exception& e) {
-               m->errorOut(e, "Chimera", "readQuantiles");
+               m->errorOut(e, "Chimera", "setMask");
                exit(1);
        }
 }
index ef62b5357981272c93cd2c9869c5b507f0c1daaa..11bc435938b99d63df86958667b801f34318a63a 100644 (file)
--- a/chimera.h
+++ b/chimera.h
 /***********************************************************************/
 struct Preference {
                string name;
-               vector<string> leftParent; //keep the name of closest left associated with the two scores
-               vector<string> rightParent; //keep the name of closest right associated with the two scores
-               vector<float> score;  //so you can keep last score and calc this score and keep whichever is bigger.
-               vector<float> closestLeft;  //keep the closest left associated with the two scores
-               vector<float> closestRight; //keep the closest right associated with the two scores
+               string leftParent; //keep the name of closest left 
+               string rightParent; //keep the name of closest 
+               float score;  //preference score
+               float closestLeft;  //keep the closest left 
+               float closestRight; //keep the closest right 
                int midpoint;
-
+               Preference() { name = ""; leftParent = ""; rightParent = ""; score = 0.0; closestLeft = 10000.0; closestRight = 10000.0; midpoint = 0;  }
+               ~Preference() {}
 };
 /***********************************************************************/
 struct score_struct {
@@ -88,38 +89,38 @@ class Chimera {
 
        public:
        
-               Chimera(){ m = MothurOut::getInstance(); }
-               Chimera(string) { m = MothurOut::getInstance(); }
-               Chimera(string, bool, string) { m = MothurOut::getInstance(); }
-               Chimera(string, string) { m = MothurOut::getInstance(); }
+               Chimera(){ m = MothurOut::getInstance(); length = 0; unaligned = false; }
+               //Chimera(string) { m = MothurOut::getInstance(); }
+               //Chimera(string, bool, string) { m = MothurOut::getInstance(); }
+               //Chimera(string, string) { m = MothurOut::getInstance(); }
                virtual ~Chimera(){     for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i];  } };
-               virtual void setFilter(bool f)                  {       filter = f;                     }
-               virtual void setCorrection(bool c)              {       correction = c;         }
-               virtual void setProcessors(int p)               {       processors = p;         }
-               virtual void setWindow(int w)                   {       window = w;                     }
-               virtual void setIncrement(int i)                {       increment = i;          }
-               virtual void setNumWanted(int n)                {       numWanted = n;          }
-               virtual void setKmerSize(int k)                 {       kmerSize = k;           }
-               virtual void setSVG(int s)                              {       svg = s;                        }
-               virtual void setName(string n)                  {       name = n;                       }
-               virtual void setMatch(int m)                    {       match = m;                      }
-               virtual void setMisMatch(int m)                 {       misMatch = m;           }
-               virtual void setDivR(float d)                   {       divR = d;                       }
-               virtual void setParents(int p)                  {       parents = p;            }
-               virtual void setMinSim(int s)                   {       minSim = s;                     }
-               virtual void setMinCoverage(int c)              {       minCov = c;                     }
-               virtual void setMinBS(int b)                    {       minBS = b;                      }
-               virtual void setMinSNP(int s)                   {       minSNP = s;                     }
-               virtual void setIters(int i)                    {       iters = i;                      }
+               //virtual void setFilter(bool f)                        {       filter = f;                     }
+               //virtual void setCorrection(bool c)            {       correction = c;         }
+               //virtual void setProcessors(int p)             {       processors = p;         }
+               //virtual void setWindow(int w)                 {       window = w;                     }
+               //virtual void setIncrement(int i)              {       increment = i;          }
+               //virtual void setNumWanted(int n)              {       numWanted = n;          }
+               //virtual void setKmerSize(int k)                       {       kmerSize = k;           }
+               //virtual void setSVG(int s)                            {       svg = s;                        }
+               //virtual void setName(string n)                        {       name = n;                       }
+               //virtual void setMatch(int m)                  {       match = m;                      }
+               //virtual void setMisMatch(int m)                       {       misMatch = m;           }
+               //virtual void setDivR(float d)                 {       divR = d;                       }
+               //virtual void setParents(int p)                        {       parents = p;            }
+               //virtual void setMinSim(int s)                 {       minSim = s;                     }
+               //virtual void setMinCoverage(int c)            {       minCov = c;                     }
+               //virtual void setMinBS(int b)                  {       minBS = b;                      }
+               //virtual void setMinSNP(int s)                 {       minSNP = s;                     }
+               //virtual void setIters(int i)                  {       iters = i;                      }
                virtual bool getUnaligned()                             {       return unaligned;                       }
-               virtual void setTemplateFile(string t)  {   templateFileName = t;       templateSeqs = readSeqs(t);  }
+               //virtual void setTemplateFile(string t)        {   templateFileName = t;       templateSeqs = readSeqs(t);  }
                virtual int getLength()                                 {   return length;      }
                
-               virtual void setCons(string){};
-               virtual void setQuantiles(string){};
-               virtual int doPrep(){ return 0; }
+               //virtual void setCons(string){};
+               //virtual void setQuantiles(string){};
+               //virtual int doPrep(){ return 0; }
                virtual vector<Sequence*> readSeqs(string);
-               virtual vector< vector<float> > readQuantiles();
+               //virtual vector< vector<float> > readQuantiles();
                virtual void setMask(string);
                virtual map<int, int> runFilter(Sequence*);
                virtual string createFilter(vector<Sequence*>, float);
@@ -127,16 +128,20 @@ class Chimera {
                virtual void printHeader(ostream&){};
                virtual int getChimeras(Sequence*){ return 0; }
                virtual int getChimeras(){ return 0; }
-               virtual int print(ostream&, ostream&){ return 0; }
+               virtual int print(ostream&, ostream&){  return 0; }
+               
+               #ifdef USE_MPI
+               virtual int print(MPI_File&, MPI_File&){  return 0; }
+               #endif
                
                
        protected:
                
                vector<Sequence*> templateSeqs;
-               bool filter, correction, svg, unaligned;
-               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, length;
-               float divR;
-               string seqMask, quanfile, filterString, name, outputDir, templateFileName;
+               bool filter, unaligned; //  correction, svg,
+               int length; //processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters,
+               //float divR;
+               string seqMask, filterString, outputDir, templateFileName; //quanfile, name, 
                Sequence* getSequence(string);  //find sequence from name       
                MothurOut* m;
 };
diff --git a/chimerabellerophoncommand.cpp b/chimerabellerophoncommand.cpp
new file mode 100644 (file)
index 0000000..f36d7a0
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ *  chimerabellerophoncommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 4/1/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimerabellerophoncommand.h"
+#include "bellerophon.h"
+
+//***************************************************************************************************************
+
+ChimeraBellerophonCommand::ChimeraBellerophonCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","filter","correction","processors","window","increment","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                       }
+
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.bellerophon command."); m->mothurOutEndLine(); abort = true;  }        
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+
+                       string temp;
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       filter = isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
+                       correction = isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                       
+                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
+                       convert(temp, increment);
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraBellerophonCommand", "ChimeraBellerophonCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ChimeraBellerophonCommand::help(){
+       try {
+               m->mothurOut("The chimera.bellerophon command reads a fastafile and creates list of potentially chimeric sequences.\n");
+               m->mothurOut("The chimera.bellerophon command parameters are fasta, filter, correction, processors, window, increment. The fasta parameter is required.\n");
+               m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter, default=false. \n");
+               m->mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
+               m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
+               #ifdef USE_MPI
+               m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
+               #endif
+               m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default is 1/4 sequence length. \n");
+               m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default is 25.\n");
+               m->mothurOut("chimera.bellerophon(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors) \n");
+               m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, window=200) \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraBellerophonCommand", "help");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+ChimeraBellerophonCommand::~ChimeraBellerophonCommand(){       /*      do nothing      */      }
+
+//***************************************************************************************************************
+
+int ChimeraBellerophonCommand::execute(){
+       try{
+               
+               if (abort == true) { return 0; }
+               
+               int start = time(NULL); 
+               
+               chimera = new Bellerophon(fastafile, filter, correction, window, increment, processors, outputDir);     
+                               
+               string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) +  "bellerophon.chimeras";
+               string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + "bellerophon.accnos";
+               bool hasAccnos = true;
+               
+               chimera->getChimeras();
+               
+               if (m->control_pressed) { delete chimera;       return 0;       }
+               
+       #ifdef USE_MPI
+               MPI_File outMPI;
+               MPI_File outMPIAccnos;
+               
+               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                                               
+               char outFilename[accnosFileName.length()];
+               strcpy(outFilename, accnosFileName.c_str());
+
+               char FileName[outputFileName.length()];
+               strcpy(FileName, outputFileName.c_str());
+
+               MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI);  //comm, filename, mode, info, filepointer
+               MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+               
+               numSeqs = chimera->print(outMPI, outMPIAccnos);
+               
+               MPI_File_close(&outMPI);
+               MPI_File_close(&outMPIAccnos);
+
+       #else
+       
+               ofstream out;
+               openOutputFile(outputFileName, out);
+               
+               ofstream out2;
+               openOutputFile(accnosFileName, out2);
+               
+               numSeqs = chimera->print(out, out2);
+               out.close();
+               out2.close(); 
+               
+       #endif
+               
+               if (m->control_pressed) { remove(accnosFileName.c_str()); remove(outputFileName.c_str()); delete chimera;       return 0;       }
+               
+               //delete accnos file if its blank 
+               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
+               m->mothurOutEndLine();
+               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+               
+               delete chimera;
+               
+               return 0;
+                               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraBellerophonCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
diff --git a/chimerabellerophoncommand.h b/chimerabellerophoncommand.h
new file mode 100644 (file)
index 0000000..e450b52
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef CHIMERABELLEROPHONCOMMAND_H
+#define CHIMERABELLEROPHONCOMMAND_H
+
+/*
+ *  chimerabellerophoncommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 4/1/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "chimera.h"
+
+
+/***********************************************************/
+
+class ChimeraBellerophonCommand : public Command {
+public:
+       ChimeraBellerophonCommand(string);
+       ~ChimeraBellerophonCommand();
+       int execute();
+       void help();
+               
+private:
+
+       bool abort, filter, correction;
+       string fastafile, outputDir;
+       int processors, window, increment, numSeqs;
+       Chimera* chimera;
+};
+
+/***********************************************************/
+
+#endif
+
+
diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp
new file mode 100644 (file)
index 0000000..748163e
--- /dev/null
@@ -0,0 +1,574 @@
+/*
+ *  chimeraccodecommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 3/30/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimeraccodecommand.h"
+#include "ccode.h"
+
+//***************************************************************************************************************
+
+ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta", "filter", "processors", "window", "template", "mask", "numwanted", "outputdir","inputdir", };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("template");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["template"] = inputDir + it->second;         }
+                               }
+                       }
+
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true;  }      
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = ""; m->mothurOut("template is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true;  }             
+                       
+                       maskfile = validParameter.validFile(parameters, "mask", false);
+                       if (maskfile == "not found") { maskfile = "";  }        
+                       else if (maskfile != "default")  { 
+                               if (inputDir != "") {
+                                       string path = hasPath(maskfile);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       maskfile = inputDir + maskfile;         }
+                               }
+
+                               ifstream in;
+                               int     ableToOpen = openInputFile(maskfile, in);
+                               if (ableToOpen == 1) { abort = true; }
+                               in.close();
+                       }
+                       
+                       string temp;
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       filter = isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                       
+                       temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "20"; }
+                       convert(temp, numwanted);
+
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ChimeraCcodeCommand::help(){
+       try {
+       
+               m->mothurOut("The chimera.ccode command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
+               m->mothurOut("This command was created using the algorythms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n");
+               m->mothurOut("The chimera.ccode command parameters are fasta, template, filter, mask, processors, window and numwanted.\n");
+               m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+               m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
+               m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
+               m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
+               #ifdef USE_MPI
+               m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
+               #endif
+               m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
+               m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
+               m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
+               m->mothurOut("The chimera.ccode command should be in the following format: \n");
+               m->mothurOut("chimera.ccode(fasta=yourFastaFile, template=yourTemplate) \n");
+               m->mothurOut("Example: chimera.seqs(fasta=AD.align, template=core_set_aligned.imputed.fasta) \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCcodeCommand", "help");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+ChimeraCcodeCommand::~ChimeraCcodeCommand(){   /*      do nothing      */      }
+
+//***************************************************************************************************************
+
+int ChimeraCcodeCommand::execute(){
+       try{
+               
+               if (abort == true) { return 0; }
+               
+               int start = time(NULL); 
+               
+               //set user options
+               if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
+
+               chimera = new Ccode(fastafile, templatefile, filter, maskfile, window, numwanted, outputDir);   
+               
+               //is your template aligned?
+               if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; }
+               templateSeqsLength = chimera->getLength();
+               
+               string outputFileName, accnosFileName;
+               if (maskfile != "") {
+                       outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".ccode.chimeras";
+                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".ccode.accnos";
+               }else {
+                       outputFileName = outputDir + getRootName(getSimpleName(fastafile))  + "ccode.chimeras";
+                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile))  + "ccode.accnos";
+               }
+
+               string mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
+               bool hasAccnos = true;
+               
+               if (m->control_pressed) { delete chimera;       return 0;       }
+               
+       #ifdef USE_MPI
+       
+                       int pid, end, numSeqsPerProcessor; 
+                       int tag = 2001;
+                       vector<long> MPIPos;
+                       MPIWroteAccnos = false;
+                       
+                       MPI_Status status; 
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                       MPI_File inMPI;
+                       MPI_File outMPI;
+                       MPI_File outMPIAccnos;
+                       
+                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                       int inMode=MPI_MODE_RDONLY; 
+                                                       
+                       char outFilename[outputFileName.length()];
+                       strcpy(outFilename, outputFileName.c_str());
+                       
+                       char outAccnosFilename[accnosFileName.length()];
+                       strcpy(outAccnosFilename, accnosFileName.c_str());
+                       
+                       char inFileName[fastafile.length()];
+                       strcpy(inFileName, fastafile.c_str());
+
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+                       MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+               
+                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+               
+                       if (pid == 0) { //you are the root process 
+                               string outTemp = "For full window mapping info refer to " + mapInfo + "\n\n";
+                               
+                               //print header
+                               int length = outTemp.length();
+                               char buf2[length];
+                               strcpy(buf2, outTemp.c_str()); 
+                               MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
+                               
+                               MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+                               
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                       
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
+
+                               for (int i = 1; i < processors; i++) {
+                                       bool tempResult;
+                                       MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                       if (tempResult != 0) { MPIWroteAccnos = true; }
+                               }
+                       }else{ //you are a child process
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPIPos.resize(numSeqs+1);
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                               
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+                               
+                               MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                       }
+                       
+                       //close files 
+                       MPI_File_close(&inMPI);
+                       MPI_File_close(&outMPI);
+                       MPI_File_close(&outMPIAccnos);
+                       
+                       //delete accnos file if blank
+                       if (pid == 0) {
+                               if (!MPIWroteAccnos) { 
+                                       //MPI_Info info;
+                                       //MPI_File_delete(outAccnosFilename, info);
+                                       hasAccnos = false;      
+                                       remove(accnosFileName.c_str()); 
+                               }
+                       }
+                               
+       #else
+               ofstream outHeader;
+               string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + "ccode.chimeras.tempHeader";
+               openOutputFile(tempHeader, outHeader);
+               
+               outHeader << "For full window mapping info refer to " << mapInfo << endl << endl;
+
+               outHeader.close();
+               
+               //break up file
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastafile, accnosFileName);
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(tempHeader.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+                               
+                               //delete accnos file if its blank 
+                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+                                                               
+                       }else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               
+                               createProcesses(outputFileName, fastafile, accnosFileName); 
+                       
+                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       
+                               //append output files
+                               for(int i=1;i<processors;i++){
+                                       appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                               
+                               vector<string> nonBlankAccnosFiles;
+                               //delete blank accnos files generated with multiple processes
+                               for(int i=0;i<processors;i++){  
+                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
+                               }
+                               
+                               //append accnos files
+                               if (nonBlankAccnosFiles.size() != 0) { 
+                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                                       
+                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+                                               appendFiles(nonBlankAccnosFiles[h], accnosFileName);
+                                               remove(nonBlankAccnosFiles[h].c_str());
+                                       }
+                               }else{ hasAccnos = false;  }
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+
+                       }
+
+               #else
+                       ifstream inFASTA;
+                       openInputFile(candidateFileNames[s], inFASTA);
+                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       inFASTA.close();
+                       lines.push_back(new linePair(0, numSeqs));
+                       
+                       driver(lines[0], outputFileName, fastafile, accnosFileName);
+                       
+                       if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(tempHeader.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                       }
+                       
+                       //delete accnos file if its blank 
+                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+               #endif
+               
+               //m->mothurOut("Output File Names: ");
+               //if ((filter) && (method == "bellerophon")) { m->mothurOut(
+               //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
+               //      else                             { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
+       
+               appendFiles(outputFileName, tempHeader);
+       
+               remove(outputFileName.c_str());
+               rename(tempHeader.c_str(), outputFileName.c_str());
+       #endif
+       
+               delete chimera;
+               
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
+               m->mothurOutEndLine();
+
+               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+               
+               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCcodeCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ChimeraCcodeCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+       try {
+               ofstream out;
+               openOutputFile(outputFName, out);
+               
+               ofstream out2;
+               openOutputFile(accnos, out2);
+               
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
+
+               inFASTA.seekg(line->start);
+               
+               for(int i=0;i<line->numSeqs;i++){
+               
+                       if (m->control_pressed) {       return 1;       }
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               
+                               if (candidateSeq->getAligned().length() != templateSeqsLength) {  
+                                       m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
+                               }else{
+                                       //find chimeras
+                                       chimera->getChimeras(candidateSeq);
+                                       
+                                       if (m->control_pressed) {       delete candidateSeq; return 1;  }
+               
+                                       //print results
+                                       chimera->print(out, out2);
+                               }
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+               }
+               //report progress
+               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               
+               out.close();
+               out2.close();
+               inFASTA.close();
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCcodeCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<long>& MPIPos){
+       try {
+                               
+               MPI_Status status; 
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               for(int i=0;i<num;i++){
+               
+                       if (m->control_pressed) { return 0; }
+                       
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+       
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+                       
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+
+                       Sequence* candidateSeq = new Sequence(iss);  gobble(iss);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               
+                               if (candidateSeq->getAligned().length() != templateSeqsLength) {  
+                                       m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
+                               }else{
+                                       //find chimeras
+                                       chimera->getChimeras(candidateSeq);
+                                       
+                                       if (m->control_pressed) {       delete candidateSeq; return 1;  }
+               
+                                       //print results
+                                       bool isChimeric = chimera->print(outMPI, outAccMPI);
+                                       if (isChimeric) { MPIWroteAccnos = true;  }
+                               }
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
+               }
+               //report progress
+               if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
+               
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCcodeCommand", "driverMPI");
+               exit(1);
+       }
+}
+#endif
+
+/**************************************************************************************************/
+
+int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename, string accnos) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               //              processIDS.resize(0);
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               return 0;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCcodeCommand", "createProcesses");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
diff --git a/chimeraccodecommand.h b/chimeraccodecommand.h
new file mode 100644 (file)
index 0000000..9a0efb9
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef CHIMERACCODECOMMAND_H
+#define CHIMERACCODECOMMAND_H
+
+/*
+ *  chimeraccodecommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 3/30/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "chimera.h"
+
+
+/***********************************************************/
+
+class ChimeraCcodeCommand : public Command {
+public:
+       ChimeraCcodeCommand(string);
+       ~ChimeraCcodeCommand();
+       int execute();
+       void help();
+       
+               
+private:
+
+       struct linePair {
+               int start;
+               int numSeqs;
+               linePair(long int i, int j) : start(i), numSeqs(j) {}
+       };
+       vector<int> processIDS;   //processid
+       vector<linePair*> lines;
+       
+       int driver(linePair*, string, string, string);
+       int createProcesses(string, string, string);
+       
+       #ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<long>&);
+       #endif
+
+       bool abort, filter, MPIWroteAccnos;
+       string fastafile, templatefile, outputDir, maskfile;
+       int processors, window, numwanted, numSeqs, templateSeqsLength;
+       Chimera* chimera;
+       
+       
+};
+
+/***********************************************************/
+
+#endif
+
diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp
new file mode 100644 (file)
index 0000000..ba5e5be
--- /dev/null
@@ -0,0 +1,464 @@
+/*
+ *  chimeracheckcommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 3/31/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimeracheckcommand.h"
+#include "chimeracheckrdp.h"
+
+//***************************************************************************************************************
+
+ChimeraCheckCommand::ChimeraCheckCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","processors","increment","template","ksize","svg", "name","outputdir","inputdir" };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("template");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["template"] = inputDir + it->second;         }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                       }
+
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true;  }      
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true;  }    
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }
+                       else if (namefile == "not found") { namefile = "";  }
+
+                       string temp = validParameter.validFile(parameters, "processors", false);                if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
+                       convert(temp, ksize);
+                       
+                       temp = validParameter.validFile(parameters, "svg", false);                              if (temp == "not found") { temp = "F"; }
+                       svg = isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "10"; }
+                       convert(temp, increment);                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckCommand", "ChimeraCheckCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ChimeraCheckCommand::help(){
+       try {
+       
+               m->mothurOut("The chimera.check command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
+               m->mothurOut("This command was created using the algorythms described in CHIMERA_CHECK version 2.7 written by Niels Larsen. \n");
+               m->mothurOut("The chimera.check command parameters are fasta, template, processors, ksize, increment, svg and name.\n");
+               m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+               m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
+               m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
+               #ifdef USE_MPI
+               m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
+               #endif
+               m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default is 10.\n");
+               m->mothurOut("The ksize parameter allows you to input kmersize, default is 7. \n");
+               m->mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n");
+               m->mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
+               m->mothurOut("The chimera.check command should be in the following format: \n");
+               m->mothurOut("chimera.check(fasta=yourFastaFile, template=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n");
+               m->mothurOut("Example: chimera.check(fasta=AD.fasta, template=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckCommand", "help");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+ChimeraCheckCommand::~ChimeraCheckCommand(){   /*      do nothing      */      }
+
+//***************************************************************************************************************
+
+int ChimeraCheckCommand::execute(){
+       try{
+               
+               if (abort == true) { return 0; }
+               
+               int start = time(NULL); 
+               
+               chimera = new ChimeraCheckRDP(fastafile, templatefile, namefile, svg, increment, ksize, outputDir);                     
+
+               if (m->control_pressed) { delete chimera;       return 0;       }
+               
+               string outputFileName = outputDir + getRootName(getSimpleName(fastafile))  + "chimeracheck.chimeras";
+               
+       #ifdef USE_MPI
+       
+                       int pid, end, numSeqsPerProcessor; 
+                       int tag = 2001;
+                       vector<long> MPIPos;
+                       
+                       MPI_Status status; 
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                       MPI_File inMPI;
+                       MPI_File outMPI;
+                                               
+                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                       int inMode=MPI_MODE_RDONLY; 
+                                                       
+                       char outFilename[outputFileName.length()];
+                       strcpy(outFilename, outputFileName.c_str());
+                       
+                       char inFileName[fastafile.length()];
+                       strcpy(inFileName, fastafile.c_str());
+
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+                       
+                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  delete chimera; return 0;  }
+                       
+                       if (pid == 0) { //you are the root process 
+                               MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+                               
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                       
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  remove(outputFileName.c_str());  delete chimera; return 0;  }
+                               
+                               //wait on chidren
+                               for(int i = 1; i < processors; i++) { 
+                                       char buf[4];
+                                       MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
+                               }
+                       }else{ //you are a child process
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPIPos.resize(numSeqs+1);
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                               
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   delete chimera; return 0;  }
+                               
+                               //tell parent you are done.
+                               char buf[4];
+                               strcpy(buf, "done"); 
+                               MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
+                       }
+                       
+                       //close files 
+                       MPI_File_close(&inMPI);
+                       MPI_File_close(&outMPI);
+       #else
+               
+               //break up file
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastafile);
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+                                                               
+                       }else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               
+                               createProcesses(outputFileName, fastafile); 
+                       
+                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       
+                               //append output files
+                               for(int i=1;i<processors;i++){
+                                       appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+                       }
+
+               #else
+                       ifstream inFASTA;
+                       openInputFile(candidateFileNames[s], inFASTA);
+                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       inFASTA.close();
+                       lines.push_back(new linePair(0, numSeqs));
+                       
+                       driver(lines[0], outputFileName, fastafile);
+                       
+                       if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                       }
+               #endif
+       #endif          
+               delete chimera;
+               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+               
+               m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine(); 
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               m->mothurOutEndLine();
+               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ChimeraCheckCommand::driver(linePair* line, string outputFName, string filename){
+       try {
+               ofstream out;
+               openOutputFile(outputFName, out);
+               
+               ofstream out2;
+               
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
+
+               inFASTA.seekg(line->start);
+               
+               for(int i=0;i<line->numSeqs;i++){
+               
+                       if (m->control_pressed) {       return 1;       }
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               //find chimeras
+                               chimera->getChimeras(candidateSeq);
+                               
+                               if (m->control_pressed) {       delete candidateSeq; return 1;  }
+       
+                               //print results
+                               chimera->print(out, out2);
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+               }
+               //report progress
+               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               
+               out.close();
+               inFASTA.close();
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector<long>& MPIPos){
+       try {
+               MPI_File outAccMPI;
+               MPI_Status status; 
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               for(int i=0;i<num;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+       
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+                       
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+
+                       Sequence* candidateSeq = new Sequence(iss);  gobble(iss);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               //find chimeras
+                               chimera->getChimeras(candidateSeq);
+                                       
+                               //print results
+                               chimera->print(outMPI, outAccMPI);
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
+               }
+               //report progress
+               if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckCommand", "driverMPI");
+               exit(1);
+       }
+}
+#endif
+
+/**************************************************************************************************/
+
+int ChimeraCheckCommand::createProcesses(string outputFileName, string filename) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               //              processIDS.resize(0);
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               return 0;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckCommand", "createProcesses");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+
diff --git a/chimeracheckcommand.h b/chimeracheckcommand.h
new file mode 100644 (file)
index 0000000..6db61bd
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef CHIMERACHECKCOMMAND_H
+#define CHIMERACHECKCOMMAND_H
+
+/*
+ *  chimeracheckcommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 3/31/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "chimera.h"
+
+
+/***********************************************************/
+
+class ChimeraCheckCommand : public Command {
+public:
+       ChimeraCheckCommand(string);
+       ~ChimeraCheckCommand();
+       int execute();
+       void help();
+       
+               
+private:
+
+       struct linePair {
+               int start;
+               int numSeqs;
+               linePair(long int i, int j) : start(i), numSeqs(j) {}
+       };
+       vector<int> processIDS;   //processid
+       vector<linePair*> lines;
+       
+       int driver(linePair*, string, string);
+       int createProcesses(string, string);
+               
+       #ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, vector<long>&);
+       #endif
+
+       bool abort, svg;
+       string fastafile, templatefile, namefile, outputDir;
+       int processors, increment, ksize, numSeqs, templateSeqsLength;
+       Chimera* chimera;
+       
+       
+};
+
+/***********************************************************/
+
+#endif
+
+
index 790d3ebe01100c1d615be9d189e73d6fa03f28ef..51a3d9bbac0eaea545e1626682049db382a973ac 100644 (file)
 #include "chimeracheckrdp.h"
                
 //***************************************************************************************************************
-ChimeraCheckRDP::ChimeraCheckRDP(string filename,  string o) {  fastafile = filename;  outputDir = o;  }
+ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : Chimera() { 
+       try {
+               fastafile = filename; 
+               templateFileName = temp;  
+               name = n;
+               svg = s;
+               increment = inc;
+               kmerSize = k;
+               outputDir = o; 
+               
+               templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
+               m->mothurOutEndLine();
+               
+               kmer = new Kmer(kmerSize);
+               
+               if (name != "") { 
+                       readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
 
 ChimeraCheckRDP::~ChimeraCheckRDP() {
@@ -19,7 +42,7 @@ ChimeraCheckRDP::~ChimeraCheckRDP() {
                delete kmer;
        }
        catch(exception& e) {
-               m->errorOut(e, "ChimeraCheckRDP", "~AlignSim");
+               m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP");
                exit(1);
        }
 }      
@@ -56,25 +79,49 @@ int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
                exit(1);
        }
 }
+#ifdef USE_MPI
 //***************************************************************************************************************
-int ChimeraCheckRDP::doPrep() {
+int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) {
        try {
-               templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
-               m->mothurOutEndLine();
                
-               kmer = new Kmer(kmerSize);
+               cout << "Processing: " << querySeq->getName() << endl; 
                
-               if (name != "") { 
-                       readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
+               string outString = "";
+               
+               outString += querySeq->getName() + "\nIS scores: \t";
+                       
+               for (int k = 0; k < IS.size(); k++) {
+                       outString += toString(IS[k].score)  + "\t"; 
+               }
+               outString += "\n";
+               
+               MPI_Status status;
+               int length = outString.length();
+               char buf[length];
+               strcpy(buf, outString.c_str()); 
+                               
+               MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
+               
+               if (svg) {
+                       if (name != "") { //if user has specific names
+                               map<string, string>::iterator it = names.find(querySeq->getName());
+                               
+                               if (it != names.end()) { //user wants pic of this
+                                       makeSVGpic(IS);  //zeros out negative results
+                               }
+                       }else{//output them all
+                               makeSVGpic(IS);  //zeros out negative results
+                       }
                }
                
                return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "ChimeraCheckRDP", "doPrep");
+               m->errorOut(e, "ChimeraCheckRDP", "print");
                exit(1);
        }
 }
+#endif
 //***************************************************************************************************************
 int ChimeraCheckRDP::getChimeras(Sequence* query) {
        try {
@@ -123,6 +170,8 @@ vector<sim> ChimeraCheckRDP::findIS() {
                        
                //for each window
                for (int f = start; f < (seq.length() - start); f+=increment) {
+               
+                       if (m->control_pressed) { return isValues; }
                        
                        if ((f - kmerSize) < 0)  { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
                        
@@ -204,18 +253,47 @@ vector<sim> ChimeraCheckRDP::findIS() {
 //***************************************************************************************************************
 void ChimeraCheckRDP::readName(string namefile) {
        try{
-               ifstream in;
-               openInputFile(namefile, in);
+       
                string name;
+
+       #ifdef USE_MPI
+               
+               MPI_File inMPI;
+               MPI_Offset size;
+               MPI_Status status;
                
+               char inFileName[namefile.length()];
+               strcpy(inFileName, namefile.c_str());
+
+               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
+               MPI_File_get_size(inMPI, &size);
+
+               char buffer[size];
+               MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
+
+               string tempBuf = buffer;
+               if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+               istringstream iss (tempBuf,istringstream::in);
+               
+               while(!iss.eof()) {
+                       iss >> name; gobble(iss);
+                       names[name] = name;
+               }
+       
+               MPI_File_close(&inMPI);
+               
+       #else   
+       
+               ifstream in;
+               openInputFile(namefile, in);
+                               
                while (!in.eof()) {
-                       
-                       in >> name;
-                       
+                       in >> name; gobble(in);
                        names[name] = name;
-                       
-                       gobble(in);
                }
+               in.close();
+       
+       #endif
        
        }
        catch(exception& e) {
@@ -260,7 +338,80 @@ int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
                exit(1);
        }
 }
+#ifdef USE_MPI
+//***************************************************************************************************************
+void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
+       try{
+               
+               string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
+               
+               MPI_File outSVG;
+               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+               
+               char FileName[file.length()];
+               strcpy(FileName, file.c_str());
+               
+               MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG);  //comm, filename, mode, info, filepointer
+               
+               int width = (info.size()*5) + 150;
+               
+               string outString = "";
+               
+               outString += "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
+               outString += "<g>\n";
+               outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
+               
+               outString +=  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
+               outString +=  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
+               
+               outString += "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
+               outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
+               outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
+               
+               outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
+               
+               outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
+               
+               
+               //find max is score
+               float biggest = 0.0;
+               for (int i = 0; i < info.size(); i++) {
+                       if (info[i].score > biggest)  {
+                               biggest = info[i].score;
+                       }
+               }
+               
+               outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
+               
+               int scaler2 = 500 / biggest;
+               
+               
+               outString += "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
+               //160,200 180,230 200,210 234,220\"/> "; 
+               for (int i = 0; i < info.size(); i++) {
+                       if(info[i].score < 0) { info[i].score = 0; }
+                       outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " ";
+               }
+               
+               outString += "\"/> ";
+               outString += "</g>\n</svg>\n";
+               
+               MPI_Status status;
+               int length = outString.length();
+               char buf2[length];
+               strcpy(buf2, outString.c_str()); 
+                               
+               MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status);
+               
+               MPI_File_close(&outSVG);
 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
+               exit(1);
+       }
+}
+#else
 //***************************************************************************************************************
 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
        try{
@@ -318,6 +469,7 @@ void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
                exit(1);
        }
 }
-//***************************************************************************************************************
+#endif
+//***************************************************************************************************************/
 
 
index f54faab045bf65f0c71797dd64ce5e25c045d0ca..f17c7b16d9124074548c391300a0e7d99d2c07b8 100644 (file)
 class ChimeraCheckRDP : public Chimera {
        
        public:
-               ChimeraCheckRDP(string, string);        
+               ChimeraCheckRDP(string, string, string, bool, int, int, string); //fasta, template, name, svg, increment, ksize, outputDir      
                ~ChimeraCheckRDP();
                
                int getChimeras(Sequence*);
                int print(ostream&, ostream&);
-               int doPrep();
                
-       private:
+               #ifdef USE_MPI
+               int print(MPI_File&, MPI_File&);
+               #endif
                
+       private:
                
                Sequence* querySeq;
                AlignmentDB* templateDB;
@@ -43,6 +45,9 @@ class ChimeraCheckRDP : public Chimera {
                vector<sim>  IS;  //IS is the vector of IS values for each window for query
                string fastafile;
                map<string, string> names;
+               string name;
+               bool svg;
+               int kmerSize, increment;
                
                vector<sim> findIS();
                int calcKmers(map<int, int>, map<int, int>);
diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp
new file mode 100644 (file)
index 0000000..919285d
--- /dev/null
@@ -0,0 +1,573 @@
+/*
+ *  chimerapintailcommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 4/1/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimerapintailcommand.h"
+#include "pintail.h"
+
+//***************************************************************************************************************
+
+ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","filter","processors","window" "increment","template","conservation","quantile","mask","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("template");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["template"] = inputDir + it->second;         }
+                               }
+                               
+                               it = parameters.find("conservation");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("quantile");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
+                               }
+                       }
+
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }    
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
+                       
+                       consfile = validParameter.validFile(parameters, "conservation", true);
+                       if (consfile == "not open") { abort = true; }
+                       else if (consfile == "not found") { consfile = "";  }   
+                       
+                       quanfile = validParameter.validFile(parameters, "quantile", true);
+                       if (quanfile == "not open") { abort = true; }
+                       else if (quanfile == "not found") { quanfile = "";  }
+                       
+                       maskfile = validParameter.validFile(parameters, "mask", false);
+                       if (maskfile == "not found") { maskfile = "";  }        
+                       else if (maskfile != "default")  { 
+                               if (inputDir != "") {
+                                       string path = hasPath(maskfile);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       maskfile = inputDir + maskfile;         }
+                               }
+
+                               ifstream in;
+                               int     ableToOpen = openInputFile(maskfile, in);
+                               if (ableToOpen == 1) { abort = true; }
+                               in.close();
+                       }
+                                               
+                       string temp;
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       filter = isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                       
+                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
+                       convert(temp, increment);
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ChimeraPintailCommand::help(){
+       try {
+       
+               m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
+               m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
+               m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
+               m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+               m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
+               m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
+               m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
+               m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
+               #ifdef USE_MPI
+               m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
+               #endif
+               m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=1/4 sequence length. \n");
+               m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
+               m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
+               m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
+               m->mothurOut("The chimera.pintail command should be in the following format: \n");
+               m->mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
+               m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPintailCommand", "help");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+ChimeraPintailCommand::~ChimeraPintailCommand(){       /*      do nothing      */      }
+
+//***************************************************************************************************************
+
+int ChimeraPintailCommand::execute(){
+       try{
+               
+               if (abort == true) { return 0; }
+               
+               int start = time(NULL); 
+               
+               chimera = new Pintail(fastafile, templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
+               
+               //set user options
+               if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
+               
+
+               string outputFileName, accnosFileName;
+               if (maskfile != "") {
+                       outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.chimeras";
+                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.accnos";
+               }else {
+                       outputFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.chimeras";
+                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.accnos";
+               }
+               bool hasAccnos = true;
+               
+               if (m->control_pressed) { delete chimera;       return 0;       }
+               
+               if (chimera->getUnaligned()) { 
+                       m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
+                       delete chimera;
+                       return 0; 
+               }
+               templateSeqsLength = chimera->getLength();
+       
+       #ifdef USE_MPI
+               int pid, end, numSeqsPerProcessor; 
+                       int tag = 2001;
+                       vector<long> MPIPos;
+                       MPIWroteAccnos = false;
+                       
+                       MPI_Status status; 
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                       MPI_File inMPI;
+                       MPI_File outMPI;
+                       MPI_File outMPIAccnos;
+                       
+                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                       int inMode=MPI_MODE_RDONLY; 
+                                                       
+                       char outFilename[outputFileName.length()];
+                       strcpy(outFilename, outputFileName.c_str());
+                       
+                       char outAccnosFilename[accnosFileName.length()];
+                       strcpy(outAccnosFilename, accnosFileName.c_str());
+                       
+                       char inFileName[fastafile.length()];
+                       strcpy(inFileName, fastafile.c_str());
+
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+                       MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+                       
+                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+
+                       if (pid == 0) { //you are the root process 
+                                                       
+                               MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+                               
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                       
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
+                               
+                               for (int i = 1; i < processors; i++) {
+                                       bool tempResult;
+                                       MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                       if (tempResult != 0) { MPIWroteAccnos = true; }
+                               }
+                       }else{ //you are a child process
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPIPos.resize(numSeqs+1);
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                               
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+
+                               MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                       }
+                       
+                       //close files 
+                       MPI_File_close(&inMPI);
+                       MPI_File_close(&outMPI);
+                       MPI_File_close(&outMPIAccnos);
+                       
+                       //delete accnos file if blank
+                       if (pid == 0) {
+                               if (!MPIWroteAccnos) { 
+                                       //MPI_Info info;
+                                       //MPI_File_delete(outAccnosFilename, info);
+                                       hasAccnos = false;      
+                                       remove(accnosFileName.c_str()); 
+                               }
+                       }
+
+       #else
+       
+               //break up file
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastafile, accnosFileName);
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+                               
+                               //delete accnos file if its blank 
+                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+                                                               
+                       }else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               
+                               createProcesses(outputFileName, fastafile, accnosFileName); 
+                       
+                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       
+                               //append output files
+                               for(int i=1;i<processors;i++){
+                                       appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                               
+                               vector<string> nonBlankAccnosFiles;
+                               //delete blank accnos files generated with multiple processes
+                               for(int i=0;i<processors;i++){  
+                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
+                               }
+                               
+                               //append accnos files
+                               if (nonBlankAccnosFiles.size() != 0) { 
+                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                                       
+                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+                                               appendFiles(nonBlankAccnosFiles[h], accnosFileName);
+                                               remove(nonBlankAccnosFiles[h].c_str());
+                                       }
+                               }else{ hasAccnos = false;  }
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+                       }
+
+               #else
+                       ifstream inFASTA;
+                       openInputFile(candidateFileNames[s], inFASTA);
+                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       inFASTA.close();
+                       lines.push_back(new linePair(0, numSeqs));
+                       
+                       driver(lines[0], outputFileName, fastafile, accnosFileName);
+                       
+                       if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                       }
+                       
+                       //delete accnos file if its blank 
+                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+               #endif
+               
+       #endif  
+       
+               delete chimera;
+               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
+               m->mothurOutEndLine();
+               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPintailCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ChimeraPintailCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+       try {
+               ofstream out;
+               openOutputFile(outputFName, out);
+               
+               ofstream out2;
+               openOutputFile(accnos, out2);
+               
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
+
+               inFASTA.seekg(line->start);
+               
+               for(int i=0;i<line->numSeqs;i++){
+               
+                       if (m->control_pressed) {       return 1;       }
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               
+                               if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
+                                       m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
+                               }else{
+                                       //find chimeras
+                                       chimera->getChimeras(candidateSeq);
+                                       
+                                       if (m->control_pressed) {       delete candidateSeq; return 1;  }
+               
+                                       //print results
+                                       chimera->print(out, out2);
+                               }
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+               }
+               //report progress
+               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               
+               out.close();
+               out2.close();
+               inFASTA.close();
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPintailCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<long>& MPIPos){
+       try {
+                               
+               MPI_Status status; 
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               for(int i=0;i<num;i++){
+                       
+                       if (m->control_pressed) {       return 1;       }
+                       
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+       
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+                       
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+
+                       Sequence* candidateSeq = new Sequence(iss);  gobble(iss);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               
+                               if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
+                                       m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
+                               }else{
+                                       //find chimeras
+                                       chimera->getChimeras(candidateSeq);
+                                       
+                                       if (m->control_pressed) {       delete candidateSeq; return 1;  }
+               
+                                       //print results
+                                       bool isChimeric = chimera->print(outMPI, outAccMPI);
+                                       if (isChimeric) { MPIWroteAccnos = true;  }
+                               }
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
+               }
+               //report progress
+               if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
+               
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
+               exit(1);
+       }
+}
+#endif
+
+/**************************************************************************************************/
+
+int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               //              processIDS.resize(0);
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               return 0;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+
diff --git a/chimerapintailcommand.h b/chimerapintailcommand.h
new file mode 100644 (file)
index 0000000..0ddfc1c
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef CHIMERAPINTAILCOMMAND_H
+#define CHIMERAPINTAILCOMMAND_H
+
+/*
+ *  chimerapintailcommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 4/1/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "chimera.h"
+
+
+/***********************************************************/
+
+class ChimeraPintailCommand : public Command {
+
+public:
+
+       ChimeraPintailCommand(string);
+       ~ChimeraPintailCommand();
+       int execute();
+       void help();
+       
+private:
+
+       struct linePair {
+               int start;
+               int numSeqs;
+               linePair(long int i, int j) : start(i), numSeqs(j) {}
+       };
+       vector<int> processIDS;   //processid
+       vector<linePair*> lines;
+       
+       int driver(linePair*, string, string, string);
+       int createProcesses(string, string, string);
+       
+       #ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<long>&);
+       #endif
+
+       bool abort, filter, MPIWroteAccnos;
+       string fastafile, templatefile, consfile, quanfile, maskfile, outputDir;
+       int processors, window, increment, numSeqs, templateSeqsLength;
+       Chimera* chimera;
+       
+       
+};
+
+/***********************************************************/
+
+#endif
+
+
index 65e082b93a6a97e162af3aca3aa576915f22bd18..663b8940592f1f4ecfc8dce5ae81beadef8bef45 100644 (file)
  */
 
 #include "chimeraseqscommand.h"
-#include "bellerophon.h"
-#include "pintail.h"
-#include "ccode.h"
-#include "chimeracheckrdp.h"
-#include "chimeraslayer.h"
-
 
 //***************************************************************************************************************
 
-ChimeraSeqsCommand::ChimeraSeqsCommand(string option)  {
-       try {
-               abort = false;
-               
-               //allow user to run help
-               if(option == "help") { help(); abort = true; }
-               
-               else {
-                       //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", 
-                       "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
-                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-                       
-                       OptionParser parser(option);
-                       map<string,string> parameters = parser.getParameters();
-                       
-                       ValidParameters validParameter;
-                       map<string,string>::iterator it;
-                       
-                       //check to make sure all parameters are valid for command
-                       for (it = parameters.begin(); it != parameters.end(); it++) { 
-                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
-                       }
-                       
-                       //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
-                       if (inputDir == "not found"){   inputDir = "";          }
-                       else {
-                               string path;
-                               it = parameters.find("fasta");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
-                               }
-                               
-                               it = parameters.find("template");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["template"] = inputDir + it->second;         }
-                               }
-                               
-                               it = parameters.find("conservation");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
-                               }
-                               
-                               it = parameters.find("quantile");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
-                               }
-                               
-                               it = parameters.find("name");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
-                               }
-                       }
-
-                       
-                       //check for required parameters
-                       fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
-                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.seqs command."); m->mothurOutEndLine(); abort = true;  }       
-                       
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
-                               outputDir = ""; 
-                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
-                       }
-
-                       templatefile = validParameter.validFile(parameters, "template", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  }   
-                       
-                       consfile = validParameter.validFile(parameters, "conservation", true);
-                       if (consfile == "not open") { abort = true; }
-                       else if (consfile == "not found") { consfile = "";  }   
-                       
-                       quanfile = validParameter.validFile(parameters, "quantile", true);
-                       if (quanfile == "not open") { abort = true; }
-                       else if (quanfile == "not found") { quanfile = "";  }
-                       
-                       namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
-                       else if (namefile == "not found") { namefile = "";  }
-
-                       maskfile = validParameter.validFile(parameters, "mask", false);
-                       if (maskfile == "not found") { maskfile = "";  }        
-                       else if (maskfile != "default")  { 
-                               if (inputDir != "") {
-                                       string path = hasPath(maskfile);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       maskfile = inputDir + maskfile;         }
-                               }
-
-                               ifstream in;
-                               int     ableToOpen = openInputFile(maskfile, in);
-                               if (ableToOpen == 1) { abort = true; }
-                               in.close();
-                       }
-                       
-                       method = validParameter.validFile(parameters, "method", false);                 if (method == "not found") { method = "pintail"; }
-                       
-                       string temp;
-                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
-                       filter = isTrue(temp);
-                       
-                       temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
-                       correction = isTrue(temp);
-                       
-                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
-                       convert(temp, processors);
-                       
-                       temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
-                       convert(temp, ksize);
-                       
-                       temp = validParameter.validFile(parameters, "svg", false);                              if (temp == "not found") { temp = "F"; }
-                       svg = isTrue(temp);
-                       
-                       temp = validParameter.validFile(parameters, "window", false);   
-                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "50"; }                      
-                       else if (temp == "not found") { temp = "0"; }
-                       convert(temp, window);
-                       
-                       temp = validParameter.validFile(parameters, "match", false);                    if (temp == "not found") { temp = "5"; }
-                       convert(temp, match);
-                       
-                       temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
-                       convert(temp, mismatch);
-                       
-                       temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.007"; }
-                       convert(temp, divR);
-                       
-                       temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
-                       convert(temp, minSimilarity);
-                       
-                       temp = validParameter.validFile(parameters, "mincov", false);                   if (temp == "not found") { temp = "70"; }
-                       convert(temp, minCoverage);
-                       
-                       temp = validParameter.validFile(parameters, "minbs", false);                    if (temp == "not found") { temp = "90"; }
-                       convert(temp, minBS);
-                       
-                       temp = validParameter.validFile(parameters, "minsnp", false);                   if (temp == "not found") { temp = "10"; }
-                       convert(temp, minSNP);
-
-                       temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
-                       convert(temp, parents); 
-                       
-                       temp = validParameter.validFile(parameters, "realign", false);                  if (temp == "not found") { temp = "f"; }
-                       realign = isTrue(temp); 
-                       
-                       search = validParameter.validFile(parameters, "search", false);                 if (search == "not found") { search = "distance"; }
-                       
-                       temp = validParameter.validFile(parameters, "iters", false);    
-                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }             
-                       else if (temp == "not found") { temp = "1000"; }
-                       convert(temp, iters); 
-                        
-                       temp = validParameter.validFile(parameters, "increment", false);                
-                       if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
-                       else if ((temp == "not found") && (method == "chimeraslayer")) { temp = "5"; }
-                       else if (temp == "not found") { temp = "25"; }
-                       convert(temp, increment);
-                       
-                       temp = validParameter.validFile(parameters, "numwanted", false);
-                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "15"; }              
-                       else if (temp == "not found") { temp = "20"; }
-                       convert(temp, numwanted);
-
-                       if ((search != "distance") && (search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
-                       
-                       if (((method != "bellerophon")) && (templatefile == "")) { m->mothurOut("You must provide a template file with the pintail, ccode, chimeraslayer or chimeracheck methods."); m->mothurOutEndLine(); abort = true;  }
-                       
-
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
-               exit(1);
-       }
-}
+ChimeraSeqsCommand::ChimeraSeqsCommand(string option)  {}
 //**********************************************************************************************************************
 
-void ChimeraSeqsCommand::help(){
-       try {
-       
-               //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
-               //m->mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
-               m->mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
-               m->mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters, search, realign.\n");
-               m->mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
-               m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
-               m->mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
-               m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
-               m->mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is pintail. Options include bellerophon, ccode and chimeracheck \n");
-               m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
-               m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
-               m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n");
-               m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n");
-               m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
-               m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n");
-               m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
-               m->mothurOut("The ksize parameter allows you to input kmersize. \n");
-               m->mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
-               m->mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
-               m->mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
-               m->mothurOut("The minsim parameter allows you .... \n");
-               m->mothurOut("The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n");
-               m->mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n");
-               m->mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
-               m->mothurOut("The search parameter allows you to specify search method for finding the closest parent. Choices are distance, blast, and kmer, default distance.  -used only by chimeraslayer. \n");
-               m->mothurOut("The realign parameter allows you to realign the query to the potential paretns. Choices are true or false, default false.  -used only by chimeraslayer. \n");
-               m->mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
-               m->mothurOut("Details for each method: \n"); 
-               m->mothurOut("\tpintail: \n"); 
-               m->mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n"); 
-               m->mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n");
-               m->mothurOut("\tbellerophon: \n"); 
-               m->mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n"); 
-               m->mothurOut("\tccode: \n"); 
-               m->mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n"); 
-               m->mothurOut("\tchimeracheck: \n"); 
-               m->mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n"); 
-               m->mothurOut("\tchimeraslayer: \n"); 
-               m->mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n"); 
-               m->mothurOut("The chimera.seqs command should be in the following format: \n");
-               m->mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
-               m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSeqsCommand", "help");
-               exit(1);
-       }
-}
+void ChimeraSeqsCommand::help(){}
 
 //***************************************************************************************************************
 
@@ -272,365 +23,11 @@ ChimeraSeqsCommand::~ChimeraSeqsCommand(){        /*      do nothing      */      }
 //***************************************************************************************************************
 
 int ChimeraSeqsCommand::execute(){
-       try{
-               
-               if (abort == true) { return 0; }
-               
-               int start = time(NULL); 
-               
-               if (method == "bellerophon")                    {               chimera = new Bellerophon(fastafile, outputDir);                        }
-               else if (method == "pintail")                   {               chimera = new Pintail(fastafile, outputDir);                            }
-               else if (method == "ccode")                             {               chimera = new Ccode(fastafile, outputDir);                                      }
-               else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, outputDir);            }
-               else if (method == "chimeraslayer")             {               chimera = new ChimeraSlayer(search, realign, fastafile);        }
-               else { m->mothurOut("Not a valid method."); m->mothurOutEndLine(); return 0;            }
-               
-               //set user options
-               if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
-               
-               chimera->setCons(consfile);     
-               chimera->setQuantiles(quanfile);                                
-               chimera->setMask(maskfile);
-               chimera->setFilter(filter);
-               chimera->setCorrection(correction);
-               chimera->setProcessors(processors);
-               chimera->setWindow(window);
-               chimera->setIncrement(increment);
-               chimera->setNumWanted(numwanted);
-               chimera->setKmerSize(ksize);
-               chimera->setSVG(svg);
-               chimera->setName(namefile);
-               chimera->setMatch(match);
-               chimera->setMisMatch(mismatch);
-               chimera->setDivR(divR);
-               chimera->setParents(parents);
-               chimera->setMinSim(minSimilarity);
-               chimera->setMinCoverage(minCoverage);
-               chimera->setMinBS(minBS);
-               chimera->setMinSNP(minSNP);
-               chimera->setIters(iters);
-               
-
-               string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
-               string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".accnos";
-               bool hasAccnos = true;
-               
-               if (method == "bellerophon") {//run bellerophon separately since you need to read entire fastafile to run it
-                       chimera->getChimeras();
-                       
-                       if (m->control_pressed) { delete chimera;       return 0;       }
-                       
-                       ofstream out;
-                       openOutputFile(outputFileName, out);
-                       
-                       ofstream out2;
-                       openOutputFile(accnosFileName, out2);
-                       
-                       chimera->print(out, out2);
-                       out.close();
-                       out2.close(); 
-                       
-                       if (m->control_pressed) { remove(accnosFileName.c_str()); remove(outputFileName.c_str()); delete chimera;       return 0;       }
-                       
-                       //delete accnos file if its blank 
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-                       
-                       m->mothurOutEndLine();
-                       m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-                       m->mothurOut(outputFileName); m->mothurOutEndLine();    
-                       if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
-                       m->mothurOutEndLine();
-                       
-                       delete chimera;
-                       return 0;
-               }
-               
-               //reads template
-               chimera->setTemplateFile(templatefile);
-               
-               if (m->control_pressed) { delete chimera;       return 0;       }
-               
-               if  (method != "chimeracheck") {   
-                       if (chimera->getUnaligned()) { 
-                               m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
-                               delete chimera;
-                               return 0; 
-                       }
-               }
-               
-               //some methods need to do prep work before processing the chimeras
-               chimera->doPrep(); 
-               
-               if (m->control_pressed) { delete chimera;       return 0;       }
-               
-               templateSeqsLength = chimera->getLength();
-               
-               ofstream outHeader;
-               string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras.tempHeader";
-               openOutputFile(tempHeader, outHeader);
-               
-               chimera->printHeader(outHeader);
-               outHeader.close();
-               
-               
-               //break up file
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors == 1){
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                               inFASTA.close();
-                               
-                               lines.push_back(new linePair(0, numSeqs));
-                               
-                               driver(lines[0], outputFileName, fastafile, accnosFileName);
-                               
-                               if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(tempHeader.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                               }
-                               
-                               //delete accnos file if its blank 
-                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-                                                               
-                       }else{
-                               vector<int> positions;
-                               processIDS.resize(0);
-                               
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               
-                               string input;
-                               while(!inFASTA.eof()){
-                                       input = getline(inFASTA);
-                                       if (input.length() != 0) {
-                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
-                                       }
-                               }
-                               inFASTA.close();
-                               
-                               numSeqs = positions.size();
-                               
-                               int numSeqsPerProcessor = numSeqs / processors;
-                               
-                               for (int i = 0; i < processors; i++) {
-                                       long int startPos = positions[ i * numSeqsPerProcessor ];
-                                       if(i == processors - 1){
-                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
-                                       }
-                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                               }
-                               
-                               
-                               createProcesses(outputFileName, fastafile, accnosFileName); 
-                       
-                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
-                                       
-                               //append output files
-                               for(int i=1;i<processors;i++){
-                                       appendOutputFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
-                               }
-                               
-                               vector<string> nonBlankAccnosFiles;
-                               //delete blank accnos files generated with multiple processes
-                               for(int i=0;i<processors;i++){  
-                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
-                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
-                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
-                               }
-                               
-                               //append accnos files
-                               if (nonBlankAccnosFiles.size() != 0) { 
-                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
-                                       
-                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                                               appendOutputFiles(nonBlankAccnosFiles[h], accnosFileName);
-                                               remove(nonBlankAccnosFiles[h].c_str());
-                                       }
-                               }else{ hasAccnos = false;  }
-                               
-                               if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                               }
-
-                       }
-
-               #else
-                       ifstream inFASTA;
-                       openInputFile(candidateFileNames[s], inFASTA);
-                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                       inFASTA.close();
-                       lines.push_back(new linePair(0, numSeqs));
-                       
-                       driver(lines[0], outputFileName, fastafile, accnosFileName);
-                       
-                       if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(tempHeader.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                       }
-                       
-                       //delete accnos file if its blank 
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-               #endif
-               
-               //m->mothurOut("Output File Names: ");
-               //if ((filter) && (method == "bellerophon")) { m->mothurOut(
-               //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
-               //      else                             { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
-       
-               appendOutputFiles(tempHeader, outputFileName);
        
-               remove(outputFileName.c_str());
-               rename(tempHeader.c_str(), outputFileName.c_str());
+               m->mothurOut("The chimera.seqs command has been broken up into 5 separate commands.\n");
+               m->mothurOut("The chimera.bellerophon, chimera.ccode, chimera.check, chimera.pintail and chimera.slayer commands.\n");
        
-               delete chimera;
-               
-               if (method == "chimeracheck") { remove(accnosFileName.c_str());  m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine();  }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outputFileName); m->mothurOutEndLine();    
-               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
-               m->mothurOutEndLine();
-
-               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-               
-               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-               
-               return 0;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSeqsCommand", "execute");
-               exit(1);
-       }
-}//**********************************************************************************************************************
-
-int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename, string accnos){
-       try {
-               ofstream out;
-               openOutputFile(outputFName, out);
-               
-               ofstream out2;
-               openOutputFile(accnos, out2);
-               
-               ifstream inFASTA;
-               openInputFile(filename, inFASTA);
-
-               inFASTA.seekg(line->start);
-               
-               for(int i=0;i<line->numSeqs;i++){
-               
-                       if (m->control_pressed) {       return 1;       }
-               
-                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
-                               
-                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
-                               
-                               if ((candidateSeq->getAligned().length() != templateSeqsLength) && (method != "chimeracheck")) {  //chimeracheck does not require seqs to be aligned
-                                       m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
-                               }else{
-                                       //find chimeras
-                                       chimera->getChimeras(candidateSeq);
-                                       
-                                       if (m->control_pressed) {       delete candidateSeq; return 1;  }
-               
-                                       //print results
-                                       chimera->print(out, out2);
-                               }
-                       }
-                       delete candidateSeq;
-                       
-                       //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
-               }
-               //report progress
-               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
-               
-               out.close();
-               out2.close();
-               inFASTA.close();
-                               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSeqsCommand", "driver");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-int ChimeraSeqsCommand::createProcesses(string outputFileName, string filename, string accnos) {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               //              processIDS.resize(0);
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
-                               process++;
-                       }else if (pid == 0){
-                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
-                               exit(0);
-                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               return 0;
-#endif         
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSeqsCommand", "createProcesses");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-void ChimeraSeqsCommand::appendOutputFiles(string temp, string filename) {
-       try{
-               
-               ofstream output;
-               ifstream input;
-               
-               openOutputFileAppend(temp, output);
-               openInputFile(filename, input, "noerror");
-               
-               while(char c = input.get()){
-                       if(input.eof())         {       break;                  }
-                       else                            {       output << c;    }
-               }
-               
-               input.close();
-               output.close();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSeqsCommand", "appendOuputFiles");
-               exit(1);
-       }
+       return 0;
 }
 //**********************************************************************************************************************
 
index 040d2dde543b9311762640680d1a8ba874e3fcf5..afbc2592a389f2faa971452d2fa12e3be4fa9680 100644 (file)
@@ -12,8 +12,6 @@
 
 #include "mothur.h"
 #include "command.hpp"
-#include "chimera.h"
-
 
 /***********************************************************/
 
@@ -27,26 +25,6 @@ public:
                
 private:
 
-       struct linePair {
-               int start;
-               int numSeqs;
-               linePair(long int i, int j) : start(i), numSeqs(j) {}
-       };
-       vector<int> processIDS;   //processid
-       vector<linePair*> lines;
-       
-       int driver(linePair*, string, string, string);
-       int createProcesses(string, string, string);
-       void appendOutputFiles(string, string); 
-
-       bool abort;
-       string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile, outputDir, search;
-       bool filter, correction, svg, printAll, realign;
-       int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
-       float divR;
-       Chimera* chimera;
-       
-       
 };
 
 /***********************************************************/
index 57ed7130916b0e1d810b470fa0552dc5556a1143..fa706eaa57b5d6c15942d274af3cc2471505d77a 100644 (file)
 #include "kmerdb.hpp"
 
 //***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) {   
-       decalc = new DeCalculator();    
+ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div, 
+int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
+       try {
+               fastafile = file;
+               templateFileName = temp; templateSeqs = readSeqs(temp);
+               searchMethod = mode;
+               kmerSize = k;
+               match = ms;
+               misMatch = mms;
+               window = win;
+               divR = div;
+               minSim = minsim;
+               minCov = mincov;
+               minBS = minbs;
+               minSNP = minsnp;
+               parents = par;
+               iters = it;
+               increment = inc;
+               numWanted = numw;
+               realign = r; 
+       
+               decalc = new DeCalculator();    
+               
+               doPrep();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+               exit(1);
+       }
 }
 //***************************************************************************************************************
 int ChimeraSlayer::doPrep() {
        try {
-       
+               
+               
+               //read in all query seqs
+               vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
+               
+               vector<Sequence*> temp = templateSeqs;
+               for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
+               
+               createFilter(temp, 0.0); //just removed columns where all seqs have a gap
+               
+               for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
+               
+               if (m->control_pressed) {  return 0; } 
+               
+               //run filter on template
+               for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  return 0; }  runFilter(templateSeqs[i]);  }
+               
                string  kmerDBNameLeft;
                string  kmerDBNameRight;
-               
+       
                //generate the kmerdb to pass to maligner
                if (searchMethod == "kmer") { 
-                       //leftside
+                       string rightTemplateFileName = "right." + templateFileName;
+                       databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
+                               
                        string leftTemplateFileName = "left." + templateFileName;
-                       databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);                      
+                       databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
+               #ifdef USE_MPI
+                       for (int i = 0; i < templateSeqs.size(); i++) {
+                                       
+                               if (m->control_pressed) { return 0; } 
+                                       
+                               string leftFrag = templateSeqs[i]->getUnaligned();
+                               leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+                                       
+                               Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
+                               databaseLeft->addSequence(leftTemp);    
+                       }
+                       databaseLeft->generateDB();
+                       databaseLeft->setNumSeqs(templateSeqs.size());
+                       
+                       for (int i = 0; i < templateSeqs.size(); i++) {
+                               if (m->control_pressed) { return 0; } 
+                                       
+                               string rightFrag = templateSeqs[i]->getUnaligned();
+                               rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+                                       
+                               Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
+                               databaseRight->addSequence(rightTemp);  
+                       }
+                       databaseRight->generateDB();
+                       databaseRight->setNumSeqs(templateSeqs.size());
+
+               #else   
+                       //leftside
                        kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
                        
@@ -52,8 +125,6 @@ int ChimeraSlayer::doPrep() {
                        databaseLeft->setNumSeqs(templateSeqs.size());
                        
                        //rightside
-                       string rightTemplateFileName = "right." + templateFileName;
-                       databaseRight = new KmerDB(rightTemplateFileName, kmerSize);                    
                        kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestRight(kmerDBNameRight.c_str());
                        
@@ -76,40 +147,8 @@ int ChimeraSlayer::doPrep() {
                        kmerFileTestRight.close();
                        
                        databaseRight->setNumSeqs(templateSeqs.size());
-
-               }
-               
-               int start = time(NULL); 
-               //filter the sequences
-               //read in all query seqs
-               ifstream in; 
-               openInputFile(fastafile, in);
-               
-               vector<Sequence*> tempQuerySeqs;
-               while(!in.eof()){
-                       if (m->control_pressed) { for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  } return 0; } 
-               
-                       Sequence* s = new Sequence(in);
-                       gobble(in);
-                       
-                       if (s->getName() != "") { tempQuerySeqs.push_back(s); }
+               #endif  
                }
-               in.close();
-               
-               vector<Sequence*> temp = templateSeqs;
-               for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
-                               
-               createFilter(temp, 0.0); //just removed columns where all seqs have a gap
-                               
-               for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
-               
-               if (m->control_pressed) {  return 0; } 
-
-               
-               //run filter on template
-               for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  return 0; }  runFilter(templateSeqs[i]);  }
-               
-               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to filter.");    m->mothurOutEndLine();
                
                return 0;
 
@@ -158,13 +197,65 @@ int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
                exit(1);
        }
 }
+#ifdef USE_MPI
+//***************************************************************************************************************
+int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
+       try {
+               MPI_Status status;
+               bool results = false;
+               string outAccString = "";
+               string outputString = "";
+               
+               if (chimeraFlags == "yes") {
+                       string chimeraFlag = "no";
+                       if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
+                          ||
+                          (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
+                       
+                       
+                       if (chimeraFlag == "yes") {     
+                               if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
+                                       cout << querySeq->getName() <<  "\tyes" << endl;
+                                       outAccString += querySeq->getName() + "\n";
+                                       results = true;
+                                       
+                                       //write to accnos file
+                                       int length = outAccString.length();
+                                       char buf2[length];
+                                       strcpy(buf2, outAccString.c_str()); 
+                               
+                                       MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
+                               }
+                       }
+                       
+                       outputString = getBlock(chimeraResults[0]);
+                       outputString += "\n";
+                       
+               }else {  outputString += querySeq->getName() + "\tno\n";  }
+               
+               //write to output file
+               int length = outputString.length();
+               char buf[length];
+               strcpy(buf, outputString.c_str()); 
+               
+               MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
+
+               return results;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "print");
+               exit(1);
+       }
+}
+#endif
+
 //***************************************************************************************************************
 int ChimeraSlayer::getChimeras(Sequence* query) {
        try {
                chimeraFlags = "no";
                
                //filter query
-               spotMap = runFilter(query);
+               spotMap = runFilter(query);     
                
                querySeq = query;
                
@@ -274,7 +365,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
 //***************************************************************************************************************
 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
        try {
-       //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
+       //out << ":)\n";
                
                out << querySeq->getName() << '\t';
                out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
@@ -307,4 +398,25 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out){
        }
 }
 //***************************************************************************************************************
+string ChimeraSlayer::getBlock(data_struct data){
+       try {
+               
+               string outputString = "";
+               
+               outputString += querySeq->getName() + "\t";
+               outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
+                       
+               outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
+               outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
+               
+               outputString += "yes\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
+               
+               return outputString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "getBlock");
+               exit(1);
+       }
+}
+//***************************************************************************************************************/
 
index 58e1656e6b995ae97d879863c0f2becc092ef9d1..3ce4cce4b40cbe331b260f08552fa34bad010d3d 100644 (file)
@@ -15,7 +15,7 @@
 #include "maligner.h"
 #include "slayer.h"
 
-/***********************************************************************/
+//***********************************************************************/
 //This class was modeled after the chimeraSlayer written by the Broad Institute
 /***********************************************************************/
 
@@ -23,7 +23,7 @@
 class ChimeraSlayer : public Chimera {
        
        public:
-               ChimeraSlayer(string, bool, string);    
+               ChimeraSlayer(string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
                ~ChimeraSlayer();
                
                int getChimeras(Sequence*);
@@ -31,6 +31,10 @@ class ChimeraSlayer : public Chimera {
                void printHeader(ostream&);
                int doPrep();
                
+               #ifdef USE_MPI
+               int print(MPI_File&, MPI_File&);
+               #endif
+               
        private:
                Sequence* querySeq;
                DeCalculator* decalc;
@@ -43,8 +47,11 @@ class ChimeraSlayer : public Chimera {
                vector<data_struct>  chimeraResults;
                string chimeraFlags, searchMethod, fastafile;
                bool realign;
+               int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment;
+               float divR;
        
                void printBlock(data_struct, ostream&);
+               string getBlock(data_struct);
                
 };
 
diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp
new file mode 100644 (file)
index 0000000..336dba9
--- /dev/null
@@ -0,0 +1,604 @@
+/*
+ *  chimeraslayercommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 3/31/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimeraslayercommand.h"
+#include "bellerophon.h"
+#include "pintail.h"
+#include "ccode.h"
+#include "chimeracheckrdp.h"
+#include "chimeraslayer.h"
+
+
+//***************************************************************************************************************
+
+ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta", "processors", "window", "template","numwanted", "ksize", "match","mismatch", 
+                       "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("template");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["template"] = inputDir + it->second;         }
+                               }
+                       }
+
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true;  }     
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true;  }   
+                                               
+                       string temp = validParameter.validFile(parameters, "processors", false);                if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
+                       convert(temp, ksize);
+                                               
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "50"; }                       
+                       convert(temp, window);
+                       
+                       temp = validParameter.validFile(parameters, "match", false);                    if (temp == "not found") { temp = "5"; }
+                       convert(temp, match);
+                       
+                       temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
+                       convert(temp, mismatch);
+                       
+                       temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.007"; }
+                       convert(temp, divR);
+                       
+                       temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
+                       convert(temp, minSimilarity);
+                       
+                       temp = validParameter.validFile(parameters, "mincov", false);                   if (temp == "not found") { temp = "70"; }
+                       convert(temp, minCoverage);
+                       
+                       temp = validParameter.validFile(parameters, "minbs", false);                    if (temp == "not found") { temp = "90"; }
+                       convert(temp, minBS);
+                       
+                       temp = validParameter.validFile(parameters, "minsnp", false);                   if (temp == "not found") { temp = "10"; }
+                       convert(temp, minSNP);
+
+                       temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
+                       convert(temp, parents); 
+                       
+                       temp = validParameter.validFile(parameters, "realign", false);                  if (temp == "not found") { temp = "f"; }
+                       realign = isTrue(temp); 
+                       
+                       search = validParameter.validFile(parameters, "search", false);                 if (search == "not found") { search = "distance"; }
+                       
+                       temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "100"; }              
+                       convert(temp, iters); 
+                        
+                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "5"; }
+                       convert(temp, increment);
+                       
+                       temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "15"; }               
+                       convert(temp, numwanted);
+
+                       if ((search != "distance") && (search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ChimeraSlayerCommand::help(){
+       try {
+       
+               m->mothurOut("The chimera.slayer command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
+               m->mothurOut("This command was modeled after the chimeraSlayer written by the Broad Institute.\n");
+               m->mothurOut("The chimera.slayer command parameters are fasta, template, filter, mask, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign,
+               m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+               m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
+               m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
+               #ifdef USE_MPI
+               m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
+               #endif
+               m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
+               m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=50. \n");
+               m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n");
+               m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n");
+               m->mothurOut("The ksize parameter allows you to input kmersize, default is 7, used if search is kmer. \n");
+               m->mothurOut("The match parameter allows you to reward matched bases in blast search, default is 5. \n");
+               m->mothurOut("The parents parameter allows you to select the number of potential parents to investigate from the numwanted best matches after rating them, default is 3. \n");
+               m->mothurOut("The mismatch parameter allows you to penalize mismatched bases in blast search, default is -4. \n");
+               m->mothurOut("The divergence parameter allows you to set a cutoff for chimera determination, default is 1.007. \n");
+               m->mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method, default=100.\n");
+               m->mothurOut("The minsim parameter allows you to specify a minimum similarity with the parent fragments, default=90. \n");
+               m->mothurOut("The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n");
+               m->mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n");
+               m->mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
+               m->mothurOut("The search parameter allows you to specify search method for finding the closest parent. Choices are distance, blast, and kmer, default distance. \n");
+               //m->mothurOut("The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default false. Found to make results worse. \n");
+               m->mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
+               m->mothurOut("The chimera.slayer command should be in the following format: \n");
+               m->mothurOut("chimera.slayer(fasta=yourFastaFile, template=yourTemplate, search=yourSearch) \n");
+               m->mothurOut("Example: chimera.slayer(fasta=AD.align, template=core_set_aligned.imputed.fasta, search=kmer) \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "help");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+ChimeraSlayerCommand::~ChimeraSlayerCommand(){ /*      do nothing      */      }
+
+//***************************************************************************************************************
+
+int ChimeraSlayerCommand::execute(){
+       try{
+               
+               if (abort == true) { return 0; }
+               
+               int start = time(NULL); 
+               
+               chimera = new ChimeraSlayer(fastafile, templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);   
+                                               
+               string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.chimeras";
+               string accnosFileName = outputDir + getRootName(getSimpleName(fastafile))  + "slayer.accnos";
+               bool hasAccnos = true;
+               
+               if (m->control_pressed) { delete chimera;       return 0;       }
+               
+               if (chimera->getUnaligned()) { 
+                       m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
+                       delete chimera;
+                       return 0; 
+               }
+               templateSeqsLength = chimera->getLength();
+               
+       #ifdef USE_MPI  
+               int pid, end, numSeqsPerProcessor; 
+                       int tag = 2001;
+                       vector<long> MPIPos;
+                       MPIWroteAccnos = false;
+                       
+                       MPI_Status status; 
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                       MPI_File inMPI;
+                       MPI_File outMPI;
+                       MPI_File outMPIAccnos;
+                       
+                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                       int inMode=MPI_MODE_RDONLY; 
+                                                       
+                       char outFilename[outputFileName.length()];
+                       strcpy(outFilename, outputFileName.c_str());
+                       
+                       char outAccnosFilename[accnosFileName.length()];
+                       strcpy(outAccnosFilename, accnosFileName.c_str());
+                       
+                       char inFileName[fastafile.length()];
+                       strcpy(inFileName, fastafile.c_str());
+
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+                       MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+                       
+                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+
+               
+                       if (pid == 0) { //you are the root process 
+                               m->mothurOutEndLine();
+                               m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
+                               m->mothurOutEndLine();
+       
+                               string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
+                               
+                               //print header
+                               int length = outTemp.length();
+                               char buf2[length];
+                               strcpy(buf2, outTemp.c_str()); 
+                               MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
+                               
+                               MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+                               
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                       
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
+
+                               for (int i = 1; i < processors; i++) {
+                                       bool tempResult;
+                                       MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                       if (tempResult != 0) { MPIWroteAccnos = true; }
+                               }
+                       }else{ //you are a child process
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPIPos.resize(numSeqs+1);
+                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numSeqs / processors;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               int startIndex =  pid * numSeqsPerProcessor;
+                               
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
+
+                               MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                       }
+                       
+                       //close files 
+                       MPI_File_close(&inMPI);
+                       MPI_File_close(&outMPI);
+                       MPI_File_close(&outMPIAccnos);
+                       
+                       //delete accnos file if blank
+                       if (pid == 0) {
+                               if (!MPIWroteAccnos) { 
+                                       //MPI_Info info;
+                                       //MPI_File_delete(outAccnosFilename, info);
+                                       hasAccnos = false;      
+                                       remove(accnosFileName.c_str()); 
+                               }
+                       }
+               
+       #else
+               ofstream outHeader;
+               string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + "slayer.chimeras.tempHeader";
+               openOutputFile(tempHeader, outHeader);
+               
+               chimera->printHeader(outHeader);
+               outHeader.close();
+               
+               //break up file
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastafile, accnosFileName);
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(tempHeader.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+                               
+                               //delete accnos file if its blank 
+                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+                                                               
+                       }else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               
+                               createProcesses(outputFileName, fastafile, accnosFileName); 
+                       
+                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       
+                               //append output files
+                               for(int i=1;i<processors;i++){
+                                       appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                               
+                               vector<string> nonBlankAccnosFiles;
+                               //delete blank accnos files generated with multiple processes
+                               for(int i=0;i<processors;i++){  
+                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
+                               }
+                               
+                               //append accnos files
+                               if (nonBlankAccnosFiles.size() != 0) { 
+                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                                       
+                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+                                               appendFiles(nonBlankAccnosFiles[h], accnosFileName);
+                                               remove(nonBlankAccnosFiles[h].c_str());
+                                       }
+                               }else{ hasAccnos = false;  }
+                               
+                               if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                               }
+
+                       }
+
+               #else
+                       ifstream inFASTA;
+                       openInputFile(candidateFileNames[s], inFASTA);
+                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       inFASTA.close();
+                       lines.push_back(new linePair(0, numSeqs));
+                       
+                       driver(lines[0], outputFileName, fastafile, accnosFileName);
+                       
+                       if (m->control_pressed) { 
+                                       remove(outputFileName.c_str()); 
+                                       remove(tempHeader.c_str()); 
+                                       remove(accnosFileName.c_str());
+                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                       delete chimera;
+                                       return 0;
+                       }
+                       
+                       //delete accnos file if its blank 
+                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+               #endif
+               
+               appendFiles(tempHeader, outputFileName);
+       
+               remove(outputFileName.c_str());
+               rename(tempHeader.c_str(), outputFileName.c_str());
+               
+       #endif
+               delete chimera;
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
+               m->mothurOutEndLine();
+
+               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+               
+               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ChimeraSlayerCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+       try {
+               ofstream out;
+               openOutputFile(outputFName, out);
+               
+               ofstream out2;
+               openOutputFile(accnos, out2);
+               
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
+
+               inFASTA.seekg(line->start);
+               
+               for(int i=0;i<line->numSeqs;i++){
+               
+                       if (m->control_pressed) {       return 1;       }
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               
+                               if (candidateSeq->getAligned().length() != templateSeqsLength) {  
+                                       m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
+                               }else{
+                                       //find chimeras
+                                       chimera->getChimeras(candidateSeq);
+                                       
+                                       if (m->control_pressed) {       delete candidateSeq; return 1;  }
+               
+                                       //print results
+                                       chimera->print(out, out2);
+                               }
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+               }
+               //report progress
+               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               
+               out.close();
+               out2.close();
+               inFASTA.close();
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<long>& MPIPos){
+       try {
+                               
+               MPI_Status status; 
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               for(int i=0;i<num;i++){
+                       
+                       if (m->control_pressed) {       return 1;       }
+                       
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+       
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+                       
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+
+                       Sequence* candidateSeq = new Sequence(iss);  gobble(iss);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               
+                               if (candidateSeq->getAligned().length() != templateSeqsLength) {  
+                                       m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
+                               }else{
+                                       //find chimeras
+                                       chimera->getChimeras(candidateSeq);
+                                       
+                                       if (m->control_pressed) {       delete candidateSeq; return 1;  }
+               
+                                       //print results
+                                       bool isChimeric = chimera->print(outMPI, outAccMPI);
+                                       if (isChimeric) { MPIWroteAccnos = true;  }
+                               }
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
+               }
+               //report progress
+               if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
+               
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "driverMPI");
+               exit(1);
+       }
+}
+#endif
+
+/**************************************************************************************************/
+
+int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               //              processIDS.resize(0);
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               return 0;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "createProcesses");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+
diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h
new file mode 100644 (file)
index 0000000..926326b
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef CHIMERASLAYERCOMMAND_H
+#define CHIMERASLAYERCOMMAND_H
+
+/*
+ *  chimeraslayercommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 3/31/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "chimera.h"
+
+
+/***********************************************************/
+
+class ChimeraSlayerCommand : public Command {
+public:
+       ChimeraSlayerCommand(string);
+       ~ChimeraSlayerCommand();
+       int execute();
+       void help();
+       
+               
+private:
+
+       struct linePair {
+               int start;
+               int numSeqs;
+               linePair(long int i, int j) : start(i), numSeqs(j) {}
+       };
+       vector<int> processIDS;   //processid
+       vector<linePair*> lines;
+       
+       int driver(linePair*, string, string, string);
+       int createProcesses(string, string, string);
+               
+       #ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<long>&);
+       #endif
+
+       bool abort, realign, MPIWroteAccnos;
+       string fastafile, templatefile, outputDir, search;
+       int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
+       float divR;
+       Chimera* chimera;
+       
+       
+};
+
+/***********************************************************/
+
+#endif
+
+
index 2db19735a8726e78269fbcce2a58521aa2205b19..557f17c6b85c7eb61865591ad3a8481ffa38ed38 100644 (file)
@@ -22,6 +22,66 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, f
                
                int start = time(NULL);
                int numSeqs = 0;
+               
+               m->mothurOut("Generating search database...    "); cout.flush();
+#ifdef USE_MPI 
+                       int pid;
+                       vector<long> positions;
+               
+                       MPI_Status status; 
+                       MPI_File inMPI;
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+       
+                       char inFileName[tempFile.length()];
+                       strcpy(inFileName, tempFile.c_str());
+       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+       
+                       if (pid == 0) { //only one process needs to scan file
+                               positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
+
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
+                       }else{
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               positions.resize(numSeqs);
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                       }
+                       
+                       //create database
+                       if(method == "kmer")                    {       database = new KmerDB(tempFile, kmerSize);                      }
+                       else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
+                       else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
+                       else if(method == "distance")   {       database = new DistanceDB();    }
+                       else {
+                               m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
+                               database = new KmerDB(tempFile, 8);
+                       }
+
+                       //read file 
+                       for(int i=0;i<numSeqs;i++){
+                               //read next sequence
+                               int length = positions[i+1] - positions[i];
+                               char buf4[length];
+                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+                               
+                               string tempBuf = buf4;
+                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                               
+                               istringstream iss (tempBuf,istringstream::in);
+                               
+                               Sequence temp(iss);  
+                               if (temp.getName() != "") {
+                                       names.push_back(temp.getName());
+                                       database->addSequence(temp);    
+                               }
+                       }
+                       
+                       database->generateDB();
+                       MPI_File_close(&inMPI);
+       #else
+               
                //need to know number of template seqs for suffixdb
                if (method == "suffix") {
                        ifstream inFASTA;
@@ -30,8 +90,6 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, f
                        inFASTA.close();
                }
 
-               m->mothurOut("Generating search database...    "); cout.flush();
-                               
                bool needToGenerate = true;
                string kmerDBName;
                if(method == "kmer")                    {       
@@ -81,7 +139,7 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, f
                        }
                        fastaFile.close();
                }
-               
+#endif         
                database->setNumSeqs(names.size());
                
                m->mothurOut("DONE."); m->mothurOutEndLine();
@@ -99,14 +157,58 @@ void Classify::readTaxonomy(string file) {
        try {
                
                phyloTree = new PhyloTree();
+               string name, taxInfo;
                
-               ifstream inTax;
-               openInputFile(file, inTax);
-       
                m->mothurOutEndLine();
                m->mothurOut("Reading in the " + file + " taxonomy...\t");      cout.flush();
+
+#ifdef USE_MPI 
+               int pid, num;
+               vector<long> positions;
                
-               string name, taxInfo;
+               MPI_Status status; 
+               MPI_File inMPI;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               char inFileName[file.length()];
+               strcpy(inFileName, file.c_str());
+               
+               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+               
+               if (pid == 0) {
+                       positions = setFilePosEachLine(file, num);
+                       
+                       //send file positions to all processes
+                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                       MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos 
+               }else{
+                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                       positions.resize(num);
+                       MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+               }
+       
+               //read file 
+               for(int i=0;i<num;i++){
+                       //read next sequence
+                       int length = positions[i+1] - positions[i];
+                       char buf4[length];
+
+                       MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                       
+                       istringstream iss (tempBuf,istringstream::in);
+                       iss >> name >> taxInfo;
+                       taxonomy[name] = taxInfo;
+                       phyloTree->addSeqToTree(name, taxInfo);
+               }
+               
+               MPI_File_close(&inMPI);
+#else                          
+               ifstream inTax;
+               openInputFile(file, inTax);
+       
                //read template seqs and save
                while (!inTax.eof()) {
                        inTax >> name >> taxInfo;
@@ -117,10 +219,11 @@ void Classify::readTaxonomy(string file) {
                
                        gobble(inTax);
                }
-               
-               phyloTree->assignHeirarchyIDs(0);
                inTax.close();
+#endif 
        
+               phyloTree->assignHeirarchyIDs(0);
+               
                m->mothurOut("DONE.");
                m->mothurOutEndLine();  cout.flush();
        
index ba854e98b40403c8d963fb65c9c4bd6178950947..a9f0a36a2a440e17e8f4f433954c766abba041ed 100644 (file)
@@ -87,15 +87,37 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                        }
                                        
                                        int ableToOpen;
+                                       
+                                       #ifdef USE_MPI  
+                                               int pid;
+                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                                               if (pid == 0) {
+                                       #endif
+                                       
                                        ifstream in;
                                        ableToOpen = openInputFile(fastaFileNames[i], in);
+                                       in.close();
+                                       
+                                       #ifdef USE_MPI  
+                                                       for (int j = 1; j < processors; j++) {
+                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
+                                                       }
+                                               }else{
+                                                       MPI_Status status;
+                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                               }
+                                               
+                                       #endif
+                                       
                                        if (ableToOpen == 1) { 
                                                m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
                                                //erase from file list
                                                fastaFileNames.erase(fastaFileNames.begin()+i);
                                                i--;
                                        }
-                                       in.close();
+                                       
                                }
                                
                                //make sure there is at least one valid file left
@@ -125,12 +147,32 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                                //if the user has not given a path then, add inputdir. else leave path alone.
                                                if (path == "") {       namefileNames[i] = inputDir + namefileNames[i];         }
                                        }
-
                                        int ableToOpen;
+                                       
+                                       #ifdef USE_MPI  
+                                               int pid;
+                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                                               if (pid == 0) {
+                                       #endif
+
                                        ifstream in;
                                        ableToOpen = openInputFile(namefileNames[i], in);
-                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match name file with fasta file."); m->mothurOutEndLine(); abort = true;        }
                                        in.close();
+                                       
+                                       #ifdef USE_MPI  
+                                                       for (int j = 1; j < processors; j++) {
+                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
+                                                       }
+                                               }else{
+                                                       MPI_Status status;
+                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                               }
+                                               
+                                       #endif
+                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match name file with fasta file."); m->mothurOutEndLine(); abort = true;        }
+                                       
                                }
                        }
 
@@ -211,6 +253,9 @@ void ClassifySeqsCommand::help(){
                m->mothurOut("The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n");
                m->mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n");
                m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
+               #ifdef USE_MPI
+               m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
+               #endif
                m->mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
                m->mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
                m->mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n");
@@ -253,21 +298,6 @@ int ClassifySeqsCommand::execute(){
                                
                for (int s = 0; s < fastaFileNames.size(); s++) {
                
-                       //read namefile
-                       if(namefile != "") {
-                               nameMap.clear(); //remove old names
-                               
-                               ifstream inNames;
-                               openInputFile(namefileNames[s], inNames);
-                               
-                               string firstCol, secondCol;
-                               while(!inNames.eof()) {
-                                       inNames >> firstCol >> secondCol; gobble(inNames);
-                                       nameMap[firstCol] = getNumNames(secondCol);  //ex. seq1 seq1,seq3,seq5 -> seq1 = 3.
-                               }
-                               inNames.close();
-                       }
-               
                        m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
                        
                        if (outputDir == "") { outputDir += hasPath(fastaFileNames[s]); }
@@ -282,7 +312,102 @@ int ClassifySeqsCommand::execute(){
                        int numFastaSeqs = 0;
                        for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                        
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#ifdef USE_MPI 
+                               int pid, end, numSeqsPerProcessor; 
+                               int tag = 2001;
+                               vector<long> MPIPos;
+                               
+                               MPI_Status status; 
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                               MPI_File inMPI;
+                               MPI_File outMPINewTax;
+                               MPI_File outMPITempTax;
+                                                       
+                               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                               int inMode=MPI_MODE_RDONLY; 
+                                                               
+                               char outNewTax[newTaxonomyFile.length()];
+                               strcpy(outNewTax, newTaxonomyFile.c_str());
+                               
+                               char outTempTax[tempTaxonomyFile.length()];
+                               strcpy(outTempTax, tempTaxonomyFile.c_str());
+                               
+                               char inFileName[fastaFileNames[s].length()];
+                               strcpy(inFileName, fastaFileNames[s].c_str());
+
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax);
+                               MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax);
+                               
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
+
+                               if(namefile != "") {  MPIReadNamesFile(namefileNames[s]);  }
+                               
+                               if (pid == 0) { //you are the root process 
+                                       
+                                       MPIPos = setFilePosFasta(fastaFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
+                                       
+                                       //send file positions to all processes
+                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos   
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numFastaSeqs / processors;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                               
+                                       //align your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {    remove(outputNames[i].c_str()); } delete classify; return 0;  }
+                                       
+                                       for (int i = 1; i < processors; i++) {
+                                               int done;
+                                               MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                       }
+                               }else{ //you are a child process
+                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       MPIPos.resize(numFastaSeqs+1);
+                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numFastaSeqs / processors;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       
+                                       //align your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
+
+                                       int done = 0;
+                                       MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                               }
+                               
+                               //close files 
+                               MPI_File_close(&inMPI);
+                               MPI_File_close(&outMPINewTax);
+                               MPI_File_close(&outMPITempTax);
+                               
+#else
+                       //read namefile
+                       if(namefile != "") {
+                               nameMap.clear(); //remove old names
+                               
+                               ifstream inNames;
+                               openInputFile(namefileNames[s], inNames);
+                               
+                               string firstCol, secondCol;
+                               while(!inNames.eof()) {
+                                       inNames >> firstCol >> secondCol; gobble(inNames);
+                                       nameMap[firstCol] = getNumNames(secondCol);  //ex. seq1 seq1,seq3,seq5 -> seq1 = 3.
+                               }
+                               inNames.close();
+                       }
+
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
                                ifstream inFASTA;
                                openInputFile(fastaFileNames[s], inFASTA);
@@ -333,7 +458,7 @@ int ClassifySeqsCommand::execute(){
                                }
                                
                        }
-#else
+       #else
                        ifstream inFASTA;
                        openInputFile(fastaFileNames[s], inFASTA);
                        numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
@@ -342,7 +467,13 @@ int ClassifySeqsCommand::execute(){
                        lines.push_back(new linePair(0, numFastaSeqs));
                        
                        driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
-#endif 
+       #endif  
+#endif
+
+               #ifdef USE_MPI  
+                       if (pid == 0) {  //this part does not need to be paralellized
+               #endif
+
                        //make taxonomy tree from new taxonomy file 
                        PhyloTree taxaBrowser;
                        
@@ -416,6 +547,10 @@ int ClassifySeqsCommand::execute(){
                        remove(newTaxonomyFile.c_str());
                        rename(unclass.c_str(), newTaxonomyFile.c_str());
                        
+                       #ifdef USE_MPI  
+                               }
+                       #endif
+
                        m->mothurOutEndLine();
                        m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
@@ -577,5 +712,113 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
                exit(1);
        }
 }
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector<long>& MPIPos){
+       try {
+               MPI_Status statusNew; 
+               MPI_Status statusTemp; 
+               MPI_Status status; 
+               
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+       
+               string taxonomy;
+               string outputString;
+
+               for(int i=0;i<num;i++){
+               
+                       if (m->control_pressed) { return 0; }
+               
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+                       
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+
+                       Sequence* candidateSeq = new Sequence(iss);
+                       
+                       if (candidateSeq->getName() != "") {
+                               taxonomy = classify->getTaxonomy(candidateSeq);
+                               
+                               if (taxonomy != "bad seq") {
+                                       //output confidence scores or not
+                                       if (probs) {
+                                               outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
+                                       }else{
+                                               outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
+                                       }
+                                       
+                                       int length = outputString.length();
+                                       char buf2[length];
+                                       strcpy(buf2, outputString.c_str()); 
+                               
+                                       MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
+                                       
+                                       outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
+                                       length = outputString.length();
+                                       char buf[length];
+                                       strcpy(buf, outputString.c_str()); 
+                               
+                                       MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
+                               }
+                       }                               
+                       delete candidateSeq;
+                       
+                       if((i+1) % 100 == 0){   cout << "Classifying sequence " << (i+1) << endl;       }
+               }
+               
+               if(num % 100 != 0){     cout << "Classifying sequence " << (num) << endl;       }
+               
+               
+               return 1;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifySeqsCommand", "driverMPI");
+               exit(1);
+       }
+}
 
+//**********************************************************************************************************************
+int ClassifySeqsCommand::MPIReadNamesFile(string nameFilename){
+       try {
+       
+               nameMap.clear(); //remove old names
+               
+               MPI_File inMPI;
+               MPI_Offset size;
+               MPI_Status status;
+               
+               char inFileName[nameFilename.length()];
+               strcpy(inFileName, nameFilename.c_str());
+
+               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
+               MPI_File_get_size(inMPI, &size);
+
+               char buffer[size];
+               MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
+
+               string tempBuf = buffer;
+               if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+               istringstream iss (tempBuf,istringstream::in);
+               
+               string firstCol, secondCol;
+               while(!iss.eof()) {
+                       iss >> firstCol >> secondCol; gobble(iss);
+                       nameMap[firstCol] = getNumNames(secondCol);  //ex. seq1 seq1,seq3,seq5 -> seq1 = 3.
+               }
+       
+               MPI_File_close(&inMPI);
+               
+               return 1;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifySeqsCommand", "MPIReadNamesFile");
+               exit(1);
+       }
+}
+#endif
 /**************************************************************************************************/
index 890085e53261ccf204a02b44fd6634e5020f0af0..0ffd18c27c70084acd2cd2c0eb99d6d2055c44b0 100644 (file)
@@ -57,6 +57,11 @@ private:
        void appendTaxFiles(string, string);
        void createProcesses(string, string, string); 
        string addUnclassifieds(string, int);
+       
+       int MPIReadNamesFile(string);
+       #ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<long>&);
+       #endif
 };
 
 #endif
index bd5986e7d7a2d0bdfe759088c8306e206a4ee9ad..e2d307e1bfbce54162b55a7368b58180205f87e5 100644 (file)
@@ -50,11 +50,14 @@ rabund(rav), list(lv), dMatrix(dm), method(f)
        // a list contains pointers (iterators) to the all distances related
        // to a certain sequence. The Vector is accessed via the index of a 
        // sequence in the distance matrix.
+       
+
        seqVec = vector<MatVec>(lv->size());
        for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
                seqVec[currentCell->row].push_back(currentCell);
                seqVec[currentCell->column].push_back(currentCell);
        }
+
        mapWanted = false;  //set to true by mgcluster to speed up overlap merge
        
        //save so you can modify as it changes in average neighbor
@@ -86,50 +89,58 @@ void Cluster::getRowColCells() {
 /***********************************************************************/
 // Remove the specified cell from the seqVec and from the sparse
 // matrix
-void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix)
-{
-       ull drow = cell->row;
-       ull dcol = cell->column;
-       if (((vrow >=0) && (drow != smallRow)) ||
-               ((vcol >=0) && (dcol != smallCol))) {
-               ull dtemp = drow;
-               drow = dcol;
-               dcol = dtemp;
-       }
+void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix){
+       try {
+       
+               ull drow = cell->row;
+                       ull dcol = cell->column;
+                       if (((vrow >=0) && (drow != smallRow)) ||
+                               ((vcol >=0) && (dcol != smallCol))) {
+                               ull dtemp = drow;
+                               drow = dcol;
+                               dcol = dtemp;
+                       }
 
-       ull crow;
-       ull ccol;
-       int nCells;
-       if (vrow < 0) {
-               nCells = seqVec[drow].size();
-               for (vrow=0; vrow<nCells;vrow++) {
-                       crow = seqVec[drow][vrow]->row;
-                       ccol = seqVec[drow][vrow]->column;
-                       if (((crow == drow) && (ccol == dcol)) ||
-                               ((ccol == drow) && (crow == dcol))) {
-                               break;
+                       ull crow;
+                       ull ccol;
+                       int nCells;
+                       if (vrow < 0) {
+                               nCells = seqVec[drow].size();
+                               for (vrow=0; vrow<nCells;vrow++) {
+                                       crow = seqVec[drow][vrow]->row;
+                                       ccol = seqVec[drow][vrow]->column;
+                                       if (((crow == drow) && (ccol == dcol)) ||
+                                               ((ccol == drow) && (crow == dcol))) {
+                                               break;
+                                       }
+                               }
                        }
-               }
-       }
-       seqVec[drow].erase(seqVec[drow].begin()+vrow);
-       if (vcol < 0) {
-               nCells = seqVec[dcol].size();
-               for (vcol=0; vcol<nCells;vcol++) {
-                       crow = seqVec[dcol][vcol]->row;
-                       ccol = seqVec[dcol][vcol]->column;
-                       if (((crow == drow) && (ccol == dcol)) ||
-                               ((ccol == drow) && (crow == dcol))) {
-                               break;
+
+                       seqVec[drow].erase(seqVec[drow].begin()+vrow);
+                       if (vcol < 0) {
+                               nCells = seqVec[dcol].size();
+                               for (vcol=0; vcol<nCells;vcol++) {
+                                       crow = seqVec[dcol][vcol]->row;
+                                       ccol = seqVec[dcol][vcol]->column;
+                                       if (((crow == drow) && (ccol == dcol)) ||
+                                               ((ccol == drow) && (crow == dcol))) {
+                                               break;
+                                       }
+                               }
                        }
-               }
+               
+                       seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
+               
+                       if (rmMatrix) {
+                               dMatrix->rmCell(cell);
+                       }
+               
        }
-       seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
-       if (rmMatrix) {
-               dMatrix->rmCell(cell);
+       catch(exception& e) {
+               m->errorOut(e, "Cluster", "removeCell");
+               exit(1);
        }
 }
-
-
 /***********************************************************************/
 
 void Cluster::clusterBins(){
@@ -177,7 +188,7 @@ void Cluster::clusterNames(){
 void Cluster::update(double& cutOFF){
        try {
                getRowColCells();       
-       
+
                vector<int> foundCol(nColCells, 0);
 
                int search;
index 7484a621645c2449bfc9f158a391549f33da71dd..5e6a9b45aea94babdb66d7c928f48b323e88d573 100644 (file)
@@ -154,7 +154,7 @@ int ClusterCommand::execute(){
                double saveCutoff = cutoff;
                
                while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
-               
+       
                        if (m->control_pressed) { //clean up
                                delete globaldata->gSparseMatrix;  globaldata->gSparseMatrix = NULL;
                                delete globaldata->gListVector;  globaldata->gListVector = NULL;
@@ -176,6 +176,7 @@ int ClusterCommand::execute(){
                        loops++;
 
                        cluster->update(cutoff);
+       
                        float dist = matrix->getSmallDist();
                        float rndDist = roundDist(dist, precision);
 
@@ -209,7 +210,7 @@ int ClusterCommand::execute(){
                //delete globaldata's copy of the sparsematrix and listvector to free up memory
                delete globaldata->gSparseMatrix;  globaldata->gSparseMatrix = NULL;
                delete globaldata->gListVector;  globaldata->gListVector = NULL;
-               
+       
                //saves .list file so you can do the collect, rarefaction and summary commands without doing a read.list
                if (globaldata->getFormat() == "phylip") { globaldata->setPhylipFile(""); }
                else if (globaldata->getFormat() == "column") { globaldata->setColumnFile(""); }
@@ -221,7 +222,7 @@ int ClusterCommand::execute(){
                sabundFile.close();
                rabundFile.close();
                listFile.close();
-               
+       
                if (saveCutoff != cutoff) { m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
                
                m->mothurOutEndLine();
index 727f6e821f019ae5c2f7629e1d0afabd33b6e789..aaeec67f1b4eec4e2194fc014e87064c1d5c2b5c 100644 (file)
 #include "otuhierarchycommand.h"
 #include "setdircommand.h"
 #include "parselistscommand.h"
+#include "parsesffcommand.h"
+#include "chimeraccodecommand.h"
+#include "chimeracheckcommand.h"
+#include "chimeraslayercommand.h"
+#include "chimerapintailcommand.h"
+#include "chimerabellerophoncommand.h"
 
 /*******************************************************/
 
@@ -78,6 +84,12 @@ CommandFactory* CommandFactory::getInstance() {
 /***********************************************************/
 
 /***********************************************************/
+//note: This class is resposible for knowing which commands are mpiEnabled,
+//If a command is not enabled only process 0 will execute the command. 
+//This avoids redundant outputs on pieces of code we have not paralellized. 
+//If you add mpi code to a existing command you need to modify the list below or the code will hang on MPI blocking commands like FIle_open. 
+//example:  commands["dist.seqs"] = "MPIEnabled";
+
 CommandFactory::CommandFactory(){
        string s = "";
        m = MothurOut::getInstance();
@@ -94,7 +106,6 @@ CommandFactory::CommandFactory(){
        commands["get.oturep"]                  = "get.oturep";
        commands["cluster"]                             = "cluster"; 
        commands["unique.seqs"]                 = "unique.seqs"; 
-       commands["dist.seqs"]                   = "MPIEnabled";
        commands["dist.shared"]                 = "dist.shared";
        commands["collect.single"]              = "collect.single"; 
        commands["collect.shared"]              = "collect.shared"; 
@@ -117,13 +128,10 @@ CommandFactory::CommandFactory(){
        commands["bootstrap.shared"]    = "bootstrap.shared";
        //commands["consensus"]                 = "consensus";
        commands["help"]                                = "help"; 
-       commands["filter.seqs"]                 = "MPIEnabled";
-       commands["align.seqs"]                  = "align.seqs";
        commands["summary.seqs"]                = "summary.seqs";
        commands["screen.seqs"]                 = "screen.seqs";
        commands["reverse.seqs"]                = "reverse.seqs";
        commands["trim.seqs"]                   = "trim.seqs";
-       commands["chimera.seqs"]                = "chimera.seqs";
        commands["list.seqs"]                   = "list.seqs";
        commands["get.seqs"]                    = "get.seqs";
        commands["remove.seqs"]                 = "get.seqs";
@@ -131,9 +139,7 @@ CommandFactory::CommandFactory(){
        commands["align.check"]                 = "align.check";
        commands["get.sharedseqs"]              = "get.sharedseqs";
        commands["get.otulist"]                 = "get.otulist";
-       commands["quit"]                                = "MPIEnabled"; 
        commands["hcluster"]                    = "hcluster"; 
-       commands["classify.seqs"]               = "classify.seqs"; 
        commands["phylotype"]                   = "phylotype";
        commands["mgcluster"]                   = "mgcluster";
        commands["pre.cluster"]                 = "pre.cluster";
@@ -142,6 +148,19 @@ CommandFactory::CommandFactory(){
        commands["set.dir"]                             = "set.dir";
        commands["merge.files"]                 = "merge.files";
        commands["parse.list"]                  = "parse.list";
+       commands["parse.sff"]                   = "parse.sff";
+       commands["classify.seqs"]               = "MPIEnabled"; 
+       commands["dist.seqs"]                   = "MPIEnabled";
+       commands["filter.seqs"]                 = "MPIEnabled";
+       commands["align.seqs"]                  = "MPIEnabled";
+       commands["chimera.seqs"]                = "chimera.seqs";
+       commands["chimera.ccode"]               = "MPIEnabled";
+       commands["chimera.check"]               = "MPIEnabled";
+       commands["chimera.slayer"]              = "MPIEnabled";
+       commands["chimera.pintail"]             = "MPIEnabled";
+       commands["chimera.bellerophon"] = "MPIEnabled";
+       commands["quit"]                                = "MPIEnabled"; 
+
 }
 /***********************************************************/
 
@@ -230,6 +249,11 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "get.otulist")                   {       command = new GetListCountCommand(optionString);                        }
                else if(commandName == "hcluster")                              {       command = new HClusterCommand(optionString);                            }
                else if(commandName == "classify.seqs")                 {       command = new ClassifySeqsCommand(optionString);                        }
+               else if(commandName == "chimera.ccode")                 {       command = new ChimeraCcodeCommand(optionString);                        }
+               else if(commandName == "chimera.check")                 {       command = new ChimeraCheckCommand(optionString);                        }
+               else if(commandName == "chimera.slayer")                {       command = new ChimeraSlayerCommand(optionString);                       }
+               else if(commandName == "chimera.pintail")               {       command = new ChimeraPintailCommand(optionString);                      }
+               else if(commandName == "chimera.bellerophon")   {       command = new ChimeraBellerophonCommand(optionString);          }
                else if(commandName == "phylotype")                             {       command = new PhylotypeCommand(optionString);                           }
                else if(commandName == "mgcluster")                             {       command = new MGClusterCommand(optionString);                           }
                else if(commandName == "pre.cluster")                   {       command = new PreClusterCommand(optionString);                          }
@@ -237,6 +261,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "otu.hierarchy")                 {       command = new OtuHierarchyCommand(optionString);                        }
                else if(commandName == "set.dir")                               {       command = new SetDirectoryCommand(optionString);                        }
                else if(commandName == "parse.list")                    {       command = new ParseListCommand(optionString);                           }
+               else if(commandName == "parse.sff")                             {       command = new ParseSFFCommand(optionString);                            }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
index 994c84505f4594a4794e0279343fd82f1214b983..efc7ba7f111d956c31d20626d825c079cf7963c8 100644 (file)
@@ -55,7 +55,11 @@ public:
        virtual void setNumSeqs(int i) {        numSeqs = i;    }
        virtual vector<int> getSequencesWithKmer(int){ vector<int> filler; return filler; };  
        virtual int getMaxKmer(){       return 1;       };
-
+       
+       #ifdef USE_MPI  
+       virtual int MPISend(int) = 0;
+       virtual int MPIRecv(int) = 0;
+       #endif
        
 protected:
        MothurOut* m;
index 4720df3cf0d968a84ba49c4af449b226a304c1cd..47e22ad47d21e9320dd5ee60bb6d40525793662d 100644 (file)
@@ -162,6 +162,8 @@ int DistanceCommand::execute(){
                
                if (abort == true) { return 0; }
                
+               int startTime = time(NULL);
+               
                int numSeqs = alignDB.getNumSeqs();
                cutoff += 0.005;
                
@@ -193,43 +195,104 @@ int DistanceCommand::execute(){
                //each process gets where it should start and stop in the file
                start = int (sqrt(float(pid)/float(processors)) * numSeqs);
                end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
-       
-               MPI_File outMPI;
-               int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
-               
-               char filename[outputFile.length()];
-               strcpy(filename, outputFile.c_str());
                
-               MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
-               
-               if (pid == 0) { //you are the root process 
-               
-                       //do your part
-                       string outputMyPart;
-                       driverMPI(start, end, outMPI, cutoff);
+               if (output != "lt") {
+                       MPI_File outMPI;
+                       int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                       
+                       char filename[outputFile.length()];
+                       strcpy(filename, outputFile.c_str());
+                       
+                       MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
+                       
+                       if (m->control_pressed) {   MPI_File_close(&outMPI);  delete distCalculator;  return 0;  }
+
+                       if (pid == 0) { //you are the root process 
+                       
+                               //do your part
+                               string outputMyPart;
+                               driverMPI(start, end, outMPI, cutoff);
+                               
+                               if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
+                       
+                               //wait on chidren
+                               for(int i = 1; i < processors; i++) { 
+                                       if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
+                                       
+                                       char buf[4];
+                                       MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
+                               }
+                       }else { //you are a child process
+                               //do your part
+                               driverMPI(start, end, outMPI, cutoff);
+                               
+                               if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
                        
-                       //wait on chidren
-                       for(int i = 1; i < processors; i++) { 
                                char buf[4];
-                               MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
+                               strcpy(buf, "done"); 
+                               //tell parent you are done.
+                               MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
                        }
                        
-                       if (output == "lt") {
-                               convertToLowerTriangle(outputFile);
-                       }
+                       MPI_File_close(&outMPI);
                        
-               }else { //you are a child process
-                       //do your part
-                       driverMPI(start, end, outMPI, cutoff);
-               
-                       char buf[4];
-                       strcpy(buf, "done"); 
+               }else { //lower triangle format
+                       if (pid == 0) { //you are the root process 
+                       
+                               //do your part
+                               string outputMyPart;
+                               long mySize;
+                               driverMPI(start, end, outputFile, mySize);
+       
+                               if (m->control_pressed) {  delete distCalculator;  return 0; }
+                               
+                               int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
+                               MPI_File outMPI;
+                               MPI_File inMPI;
+                       
+                               char filename[outputFile.length()];
+                               strcpy(filename, outputFile.c_str());
                        
-                       //tell parent you are done.
-                       MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
+                               MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
+
+                               //wait on chidren
+                               for(int b = 1; b < processors; b++) { 
+                                       long fileSize;
+                                       
+                                       if (m->control_pressed) { MPI_File_close(&outMPI);  delete distCalculator;  return 0; }
+                                       
+                                       MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
+                                       
+                                       string outTemp = outputFile + toString(b) + ".temp";
+                                       char buf[outTemp.length()];
+                                       strcpy(buf, outTemp.c_str());
+                                       
+                                       MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
+                                       
+                                       int count = 0;
+                                       while (count < fileSize) { //read 1000 characters at a time
+                                               //send freqs
+                                               char buf2[1];
+                                               MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
+                                               MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
+                                               count += 1;
+                                       }
+                                       
+                                       MPI_File_close(&inMPI); //deleted on close
+                               }
+                               
+                               MPI_File_close(&outMPI);
+                       }else { //you are a child process
+                               //do your part
+                               long size;
+                               driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size);
+                               
+                               if (m->control_pressed) { delete distCalculator;  return 0; }
+                       
+                               //tell parent you are done.
+                               MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
+                       }
                }
-               
-               MPI_File_close(&outMPI);
 #else          
                                
        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
@@ -284,7 +347,7 @@ int DistanceCommand::execute(){
                m->mothurOut("Output File Name: "); m->mothurOutEndLine();
                m->mothurOut(outputFile); m->mothurOutEndLine();
                m->mothurOutEndLine();
-
+               m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
                return 0;
                
        }
@@ -408,40 +471,101 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo
                                        if (output == "column") { outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); }
                                }
                                
-                               if ((output == "square") || (output == "lt")){ //make a square column you can convert to square phylip
+                               if (output == "square") { //make a square column you can convert to square phylip
                                        outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n');
                                        outputString += (alignDB.get(j).getName() + ' ' + alignDB.get(i).getName() + ' ' + toString(dist) + '\n');
                                }
-
                        }
                        
                        if(i % 100 == 0){
-                               m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
+                               //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
+                               cout << i << '\t' << (time(NULL) - startTime) << endl;
                        }
                        
-                       if(i % 10 == 0){ //output to file 
-                               //send results to parent
-                               int length = outputString.length();
-                               char buf[length];
-                               strcpy(buf, outputString.c_str()); 
-                               
-                               MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
-                               outputString = "";
-                       }
+                        
+                       //send results to parent
+                       int length = outputString.length();
+                       char buf[length];
+                       strcpy(buf, outputString.c_str()); 
+                       
+                       MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
+                       outputString = "";
                        
                }
                
-               m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
-               if(outputString != ""){ //output to file 
-                               //send results to parent
-                               int length = outputString.length();
-                               char buf[length];
-                               strcpy(buf, outputString.c_str()); 
+               //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
+               cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;                
+               return 1;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DistanceCommand", "driverMPI");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/////// need to fix to work with calcs and sequencedb
+int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& size){
+       try {
+               MPI_Status status;
+               
+               MPI_File outMPI;
+               int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+               
+               char filename[file.length()];
+               strcpy(filename, file.c_str());
+               
+               MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
+
+               int startTime = time(NULL);
+               
+               string outputString = "";
+               size = 0;
+               
+               if((output == "lt") && startLine == 0){ outputString += toString(alignDB.getNumSeqs()) + "\n";  }
+               
+               for(int i=startLine;i<endLine;i++){
+                       if(output == "lt")      {       
+                               string name = alignDB.get(i).getName();
+                               if (name.length() < 10) { //pad with spaces to make compatible
+                                       while (name.length() < 10) {  name += " ";  }
+                               }
+                               outputString += name + "\t";    
+                       }
+                       for(int j=0;j<i;j++){
+                               
+                               if (m->control_pressed) {  return 0;  }
+                               
+                               distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
+                               double dist = distCalculator->getDist();
                                
-                               MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
-                               outputString = "";
+                               if (output == "lt") {  outputString += toString(dist) + "\t"; }
+                       }
+                       
+                       if (output == "lt") { outputString += "\n"; }
+
+               
+                       if(i % 100 == 0){
+                               //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
+                               cout << i << '\t' << (time(NULL) - startTime) << endl;
+                       }
+                       
+                       
+                       //send results to parent
+                       int length = outputString.length();
+                       char buf[length];
+                       strcpy(buf, outputString.c_str()); 
+                       
+                       MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
+                       size += outputString.length();
+                       outputString = "";
+                       
+                       
                }
                
+               //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
+               cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
+               MPI_File_close(&outMPI);
+               
                return 1;
        }
        catch(exception& e) {
index c1dac1443a4d2a213ab715d79908e6dd76027a62..f825cfbf343455b9ba5ef4bfb89836df8775cebe 100644 (file)
@@ -49,6 +49,7 @@ private:
        
        #ifdef USE_MPI 
        int driverMPI(int, int, MPI_File&, float);
+       int driverMPI(int, int, string, long&);
        #endif
        
        int convertMatrix(string);
index 01fea59aeec3b340041fea68e26120cfd307a873..47e5fd7b4911de428712bb9a5271fd61ce20b172 100644 (file)
@@ -25,6 +25,11 @@ public:
        void addSequence(Sequence);  
        vector<int> findClosestSequences(Sequence*, int);  // returns indexes of n closest sequences to query
        
+       #ifdef USE_MPI  
+       int MPISend(int) {return 0;}
+       int MPIRecv(int) {return 0;}
+       #endif
+       
 private:
        vector<Sequence> data;
        Dist* distCalculator;
index b402a5bac5cc8cf8ad6a2ac4b4ab882fc91e306b..3d3062b4cc371848fdbd222b7985415a41e62e57 100644 (file)
@@ -167,9 +167,12 @@ int FilterSeqsCommand::execute() {
                inFASTA.close();
                
                ////////////create filter/////////////////
+               m->mothurOut("Creating Filter... "); m->mothurOutEndLine();
                
                filter = createFilter();
                
+               m->mothurOutEndLine();  m->mothurOutEndLine();
+               
                if (m->control_pressed) { return 0; }
                
                #ifdef USE_MPI
@@ -193,8 +196,12 @@ int FilterSeqsCommand::execute() {
                
                ////////////run filter/////////////////
                
+               m->mothurOut("Running Filter... "); m->mothurOutEndLine();
+               
                filterSequences();
-                                               
+               
+               m->mothurOutEndLine();  m->mothurOutEndLine();
+                                       
                int filteredLength = 0;
                for(int i=0;i<alignmentLength;i++){
                        if(filter[i] == '1'){   filteredLength++;       }
@@ -235,14 +242,16 @@ int FilterSeqsCommand::filterSequences() {
                                
                                string filteredFasta = outputDir + getRootName(getSimpleName(fastafileNames[s])) + "filter.fasta";
 #ifdef USE_MPI 
-                               int pid, start, end; 
+                               int pid, start, end, numSeqsPerProcessor, num
                                int tag = 2001;
+                               vector<long>MPIPos;
                                                
                                MPI_Status status; 
                                MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
                                
                                MPI_File outMPI;
+                               MPI_File tempMPI;
                                MPI_File inMPI;
                                int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
                                int inMode=MPI_MODE_RDONLY; 
@@ -256,28 +265,26 @@ int FilterSeqsCommand::filterSequences() {
                                MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
                                MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
                                
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
+
                                if (pid == 0) { //you are the root process 
                                        
-                                       setLines(fastafileNames[s]);
-                                       
-                                       char bufF[alignmentLength];
-                                       strcpy(bufF, filter.c_str()); 
-                                                               
-                                       for (int j = 0; j < lines.size(); j++) { //each process
-                                               if (j != 0) { //don't send to yourself
-                                                       MPI_Send(&lines[j]->start, 1, MPI_INT, j, tag, MPI_COMM_WORLD); //start position in file
-                                                       MPI_Send(&bufferSizes[j], 1, MPI_INT, j, tag, MPI_COMM_WORLD); //how bytes for the read
-                                                       MPI_Send(bufF, alignmentLength, MPI_CHAR, j, tag, MPI_COMM_WORLD);
-                                               }
-                                       }
+                                       MPIPos = setFilePosFasta(fastafileNames[s], num); //fills MPIPos, returns numSeqs
+                                       numSeqs += num;
                                        
-                                       //read your peice of file
-                                       char buf[bufferSizes[0]];
-                                       MPI_File_read_at(inMPI, lines[0]->start, buf, bufferSizes[0], MPI_CHAR, &status);
-                                       istringstream iss (buf,istringstream::in);
+                                       //send file positions to all processes
+                                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                                       MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos    
                                        
+                                       //figure out how many sequences you have to do
+                                       numSeqsPerProcessor = num / processors;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                               
                                        //do your part
-                                       driverMPIRun(iss, outMPI);
+                                       driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
                                        
                                        //wait on chidren
                                        for(int i = 1; i < processors; i++) { 
@@ -286,23 +293,21 @@ int FilterSeqsCommand::filterSequences() {
                                        }
                                        
                                }else { //you are a child process
-                                       //receive your section of file
-                                       int startPos, bufferSize;
-                                       char bufF[alignmentLength];
-                                       MPI_Recv(&startPos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                       MPI_Recv(&bufferSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                       MPI_Recv(bufF, alignmentLength, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
+                                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       numSeqs += num;
+                                       MPIPos.resize(num+1);
+                                       MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
                                        
-                                       filter = bufF; //filter was made by process 0 so other processes need to get it
-                                                               
-                                       //read your peice of file
-                                       char buf2[bufferSize];
-                                       MPI_File_read_at(inMPI, startPos, buf2, bufferSize, MPI_CHAR, &status);
-                                       istringstream iss (buf2,istringstream::in);
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = num / processors;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       
+                                       //align your part
+                                       driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);           
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
                                        
-                                       //do your part
-                                       driverMPIRun(iss, outMPI);
-                               
                                        char buf[4];
                                        strcpy(buf, "done"); 
                                        
@@ -361,16 +366,28 @@ int FilterSeqsCommand::filterSequences() {
                exit(1);
        }
 }
+#ifdef USE_MPI
 /**************************************************************************************/
-int FilterSeqsCommand::driverMPIRun(istringstream& in, MPI_File& outMPI) {     
+int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector<long>& MPIPos) {     
        try {
                string outputString = "";
                int count = 0;
                MPI_Status status; 
                
-               while (!in.eof()) {
+               for(int i=0;i<num;i++){
+               
+                       if (m->control_pressed) { return 0; }
+               
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
                        
-                       Sequence seq(in); gobble(in);
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+       
+                       Sequence seq(iss);  gobble(iss);
                        
                        if (seq.getName() != "") {
                                string align = seq.getAligned();
@@ -396,6 +413,8 @@ int FilterSeqsCommand::driverMPIRun(istringstream& in, MPI_File& outMPI) {
                                }
 
                        }
+                       
+                       if((i+1) % 100 == 0){   cout << (i+1) << endl;   m->mothurOutJustToLog(toString(i+1) + "\n");   }
                }
                
                if(outputString != ""){ //output to file 
@@ -407,7 +426,8 @@ int FilterSeqsCommand::driverMPIRun(istringstream& in, MPI_File& outMPI) {
                        MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
                        outputString = "";
                }
-
+               
+               if((num) % 100 != 0){   cout << (num) << endl;   m->mothurOutJustToLog(toString(num) + "\n");   }
                        
                return 0;
        }
@@ -416,6 +436,7 @@ int FilterSeqsCommand::driverMPIRun(istringstream& in, MPI_File& outMPI) {
                exit(1);
        }
 }
+#endif
 /**************************************************************************************/
 int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string inputFilename, linePair* line) {        
        try {
@@ -518,9 +539,9 @@ string FilterSeqsCommand::createFilter() {
                                for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                        
 #ifdef USE_MPI 
-                               int pid; 
-                               int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
+                               int pid, numSeqsPerProcessor, num; 
                                int tag = 2001;
+                               vector<long> MPIPos;
                                
                                MPI_Status status; 
                                MPI_File inMPI; 
@@ -532,80 +553,44 @@ string FilterSeqsCommand::createFilter() {
                
                                MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
                                
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  return 0;  }
+                               
                                if (pid == 0) { //you are the root process
-                                               setLines(fastafileNames[s]);
-                                       
-                                               for (int j = 0; j < lines.size(); j++) { //each process
-                                                       if (j != 0) { //don't send to yourself
-                                                               MPI_Send(&lines[j]->start, 1, MPI_INT, j, tag, MPI_COMM_WORLD); //start position in file
-                                                               MPI_Send(&numSeqs, 1, MPI_INT, j, tag, MPI_COMM_WORLD); 
-                                                               MPI_Send(&bufferSizes[j], 1, MPI_INT, j, tag, MPI_COMM_WORLD); //how bytes for the read
-                                                       }
-                                               }
-                       
-                                               char buf[bufferSizes[0]];
-                                               MPI_File_read_at(inMPI, 0, buf, bufferSizes[0], MPI_CHAR, &status);
-                       
-                                               string tempBuf = buf;
-                                               if (tempBuf.length() > bufferSizes[0]) { tempBuf = tempBuf.substr(0, bufferSizes[0]); }
-
-                                               MPICreateFilter(F, tempBuf);
-                                               
-                                               if (m->control_pressed) { MPI_File_close(&inMPI); return filterString; }
-                                                                                               
-                                               vector<int> temp; temp.resize(alignmentLength+1);
+                                               MPIPos = setFilePosFasta(fastafileNames[s], num); //fills MPIPos, returns numSeqs
+                                               numSeqs += num;
                                                
-                                               //get the frequencies from the child processes
-                                               for(int i = 0; i < ((processors-1)*5); i++) { 
-                                                       MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status); 
-                                                       int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
-                               
-                                                       if (receiveTag == Atag) { //you are recieveing the A frequencies
-                                                               for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
-                                                       }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
-                                                               for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
-                                                       }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
-                                                               for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
-                                                       }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
-                                                               for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
-                                                       }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
-                                                               for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
-                                                       }
-                                               } 
-
+                                               //send file positions to all processes
+                                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                                               MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos    
+                                                               
+                                               //figure out how many sequences you have to do
+                                               numSeqsPerProcessor = num / processors;
+                                               if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                               int startIndex =  pid * numSeqsPerProcessor;
+                               
+                                               //do your part
+                                               MPICreateFilter(startIndex, numSeqsPerProcessor, F, inMPI, MPIPos);
                                                
+                                               if (m->control_pressed) {  MPI_File_close(&inMPI);  return 0;  }
+                                                                                               
                                }else { //i am the child process
                        
-                                       int startPos, bufferSize;
-                                       MPI_Recv(&startPos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                       MPI_Recv(&bufferSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                                               
-                                       //send freqs
-                                       char buf2[bufferSize];
-                                       MPI_File_read_at(inMPI, startPos, buf2, bufferSize, MPI_CHAR, &status);
-                       
-                                       string tempBuf = buf2;
-                                       if (tempBuf.length() > bufferSize) { tempBuf = tempBuf.substr(0, bufferSize); }
-                       
-                                       MPICreateFilter(F, tempBuf);
-                               
-                                       if (m->control_pressed) { MPI_File_close(&inMPI); return filterString; }
+                                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       MPIPos.resize(num+1);
+                                       numSeqs += num;
+                                       MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = num / processors;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       
+                                       //do your part
+                                       MPICreateFilter(startIndex, numSeqsPerProcessor, F, inMPI,  MPIPos);
                                        
-                                       //send my fequency counts
-                                       F.a.push_back(Atag);
-                                       int ierr = MPI_Send(&(F.a[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
-                                       F.t.push_back(Ttag);
-                                       ierr = MPI_Send (&(F.t[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
-                                       F.c.push_back(Ctag);
-                                       ierr = MPI_Send(&(F.c[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
-                                       F.g.push_back(Gtag);
-                                       ierr = MPI_Send(&(F.g[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
-                                       F.gap.push_back(Gaptag);
-                                       ierr = MPI_Send(&(F.gap[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  return 0;  }
                                }
                                
-                               MPI_Barrier(MPI_COMM_WORLD);
                                MPI_File_close(&inMPI);
                                
 #else
@@ -645,13 +630,74 @@ string FilterSeqsCommand::createFilter() {
                        }
                }
 
+
+#ifdef USE_MPI 
+               int pid;
+               int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
+               MPI_Status status;
+               
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               if (pid == 0) { //only one process should output the filter
+               
+                       vector<int> temp; temp.resize(alignmentLength+1);
+                                                       
+                       //get the frequencies from the child processes
+                       for(int i = 0; i < ((processors-1)*5); i++) { 
+                               MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status); 
+                               int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
+                               
+                               if (receiveTag == Atag) { //you are recieveing the A frequencies
+                                       for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
+                               }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
+                                       for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
+                               }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
+                                       for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
+                               }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
+                                       for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
+                               }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
+                                       for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
+                               }
+                       } 
+               }else{
+               
+                       //send my fequency counts
+                       F.a.push_back(Atag);
+                       int ierr = MPI_Send(&(F.a[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                       F.t.push_back(Ttag);
+                       ierr = MPI_Send (&(F.t[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                       F.c.push_back(Ctag);
+                       ierr = MPI_Send(&(F.c[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                       F.g.push_back(Gtag);
+                       ierr = MPI_Send(&(F.g[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                       F.gap.push_back(Gaptag);
+                       ierr = MPI_Send(&(F.gap[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
+               }
+               
+               if (pid == 0) { //only one process should output the filter
+#endif
                F.setNumSeqs(numSeqs);
                                
                if(isTrue(vertical) == 1)       {       F.doVertical(); }
                if(soft != 0)                           {       F.doSoft();             }
                        
                filterString = F.getFilter();
-
+               
+#ifdef USE_MPI
+               //send filter string to kids
+               MPI_Bcast(&filterString[0], alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD); 
+       }else{
+               //recieve filterString
+               char tempBuf[alignmentLength];
+               MPI_Bcast(tempBuf, alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
+               
+               filterString = tempBuf;
+               if (filterString.length() > alignmentLength) { filterString = filterString.substr(0, alignmentLength);  }
+       }
+       
+       MPI_Barrier(MPI_COMM_WORLD);
+#endif
+               
+               
                return filterString;
        }
        catch(exception& e) {
@@ -697,31 +743,43 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair*
                exit(1);
        }
 }
+#ifdef USE_MPI
 /**************************************************************************************/
-int FilterSeqsCommand::MPICreateFilter(Filters& F, string input) {     
+int FilterSeqsCommand::MPICreateFilter(int start, int num, Filters& F, MPI_File& inMPI, vector<long>& MPIPos) {        
        try {
                
-               vector<string> seqStrings;
-               parseBuffer(input, seqStrings);
+               MPI_Status status; 
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
                
-               for(int i=0;i<seqStrings.size();i++){
+               for(int i=0;i<num;i++){
                        
-                       if (seqStrings[i].length() != alignmentLength) {  cout << i << '\t' << seqStrings[i].length() << "Sequences are not all the same length, please correct." << endl; m->control_pressed = true;  }
-
-                       if (m->control_pressed) { return 1; }
+                       if (m->control_pressed) { return 0; }
+                       
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
+       
+                       char buf4[length];
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
                        
-                       Sequence seq("", seqStrings[i]);
+                       string tempBuf = buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+       
+                       Sequence seq(iss);  
+
+                       if (seq.getAligned().length() != alignmentLength) {  cout << "Alignment length is " << alignmentLength << " and sequence " << seq.getName() << " has length " << seq.getAligned().length() << ", please correct." << endl; exit(1);  }
                        
                        if(trump != '*'){       F.doTrump(seq); }
                        if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
                        cout.flush();
                                                
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
+                       if((i+1) % 100 == 0){   cout << (i+1) << endl;   m->mothurOutJustToLog(toString(i+1) + "\n");   }
                }
                
                //report progress
-               if((seqStrings.size()) % 100 != 0){     m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine();               }
+               if((num) % 100 != 0){   cout << num << endl; m->mothurOutJustToLog(toString(num) + "\n");       }
                
                return 0;
        }
@@ -730,7 +788,7 @@ int FilterSeqsCommand::MPICreateFilter(Filters& F, string input) {
                exit(1);
        }
 }
-
+#endif
 /**************************************************************************************************/
 
 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
@@ -826,28 +884,4 @@ int FilterSeqsCommand::setLines(string filename) {
                exit(1);
        }
 }
-/**************************************************************************************************/
-int FilterSeqsCommand::parseBuffer(string file, vector<string>& seqs) {
-       try {   
-               istringstream iss (file); //,istringstream::in
-               string name, seqstring;
-
-               while (!iss.eof()) {
-                       
-                       if (m->control_pressed) { return 0; }
-                               
-                       Sequence seq(iss); gobble(iss);
-                       
-                       if (seq.getName() != "") {
-                               seqs.push_back(seq.getAligned());       
-                       }
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "FilterSeqsCommand", "parseBuffer");
-               exit(1);
-       }
-}
 /**************************************************************************************/
index 1d2526fdca83114dcca76217917324d134bcc391..3c46036468401352987fb9d6cb61b48447613dab 100644 (file)
@@ -47,12 +47,14 @@ private:
        int filterSequences();
        int createProcessesCreateFilter(Filters&, string);
        int createProcessesRunFilter(string, string);
-       int driverCreateFilter(Filters&, string, linePair*);
-       int driverRunFilter(string, string, string, linePair*); 
-       int driverMPIRun(istringstream&, MPI_File&);
-       int MPICreateFilter(Filters&, string);  
+       int driverRunFilter(string, string, string, linePair*);
+       int driverCreateFilter(Filters& F, string filename, linePair* line);
+       #ifdef USE_MPI
+       int driverMPIRun(int, int, MPI_File&, MPI_File&, vector<long>&);
+       int MPICreateFilter(int, int, Filters&, MPI_File&, vector<long>&);      
+       #endif
        int setLines(string);
-       int parseBuffer(string, vector<string>&);
+       
        
 };
 
index 72866491648e1fb02f681c1ca5842fdbba8e58a2..43c6f2e30a6dc04458e5aab7c42a8ada3636dae5 100644 (file)
@@ -44,6 +44,7 @@ FullMatrix::FullMatrix(ifstream& filehandle) {
                                
                                for(int i=0;i<numSeqs;i++){
                                        filehandle >> matrix[0][i];
+                                       if (globaldata->sim) {  matrix[0][i] = 1.0 - matrix[0][i];  }
                                }
                                break;
                        }
@@ -93,6 +94,7 @@ int FullMatrix::readSquareMatrix(ifstream& filehandle) {
                                if (m->control_pressed) { delete reading;  return 0; }
                                
                                filehandle >> matrix[i][j];
+                               if (globaldata->sim) {  matrix[i][j] = 1.0 - matrix[i][j];  }
                                
                                count++;
                                reading->update(count);
@@ -135,8 +137,10 @@ int FullMatrix::readLTMatrix(ifstream& filehandle) {
                                if (m->control_pressed) { delete reading;  return 0; }
                                
                                filehandle >> distance;
-               
+                               if (globaldata->sim) {  distance = 1.0 - distance;  }
+                               
                                matrix[i][j] = distance;  matrix[j][i] = distance;
+                               
                                count++;
                                reading->update(count);
                        }
index b4b8e0d33912acdd9790a49bc276dc8deef4d6d8..35415c70c874039c99c758c24a77b7b43eeb6079 100644 (file)
@@ -43,7 +43,7 @@ public:
        TreeMap* gTreemap;
        SequenceDB* gSequenceDB;
        string inputFileName, argv;
-       bool allLines, runParse, jumble;
+       bool allLines, runParse, jumble, sim;
        vector<string>  Estimators, Groups; //holds estimators to be used
        set<string> labels; //holds labels to be used
        vector<string> Treenames;
index 7f7f26589cf6a5c8a566393db9ec76939eb7c0a9..77d93daf4b6c8cf18a8be546fcd4eb41c0dc7301 100644 (file)
@@ -48,6 +48,8 @@ KmerDB::KmerDB(string fastaFileName, int kSize) : Database(), kmerSize(kSize) {
 
 }
 /**************************************************************************************************/
+KmerDB::KmerDB() : Database() {}
+/**************************************************************************************************/
 
 KmerDB::~KmerDB(){}
 
@@ -204,6 +206,44 @@ vector<int> KmerDB::getSequencesWithKmer(int kmer) {
                exit(1);
        }       
 }
+#ifdef USE_MPI 
+/**************************************************************************************************/
+int KmerDB::MPISend(int receiver) {
+       try {
+               
+               //send kmerSize - int
+               MPI_Send(&kmerSize, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "KmerDB", "MPISend");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int KmerDB::MPIRecv(int sender) {
+       try {
+               MPI_Status status;
+               
+               //receive kmerSize - int
+               MPI_Recv(&kmerSize, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
+               
+               //set maxKmer 
+               int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+               count = 0;
+               maxKmer = power4s[kmerSize];
+               kmerLocations.resize(maxKmer+1);
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "KmerDB", "MPIRecv");
+               exit(1);
+       }
+}
+#endif
+/**************************************************************************************************/
 
 
 /**************************************************************************************************/
index bdd9ca503910e6b764c3c067b5d9305409fafd6f..513f3f07d64bbd267e11bd16777fae5aadb97a5c 100644 (file)
@@ -26,6 +26,7 @@ class KmerDB : public Database {
        
 public:
        KmerDB(string, int);
+       KmerDB();
        ~KmerDB();
        
        void generateDB();
@@ -36,6 +37,11 @@ public:
        vector<int> getSequencesWithKmer(int);  //returns vector of sequences that contain kmer passed in
        int getMaxKmer() { return maxKmer; }
        
+       #ifdef USE_MPI  
+       int MPISend(int); //just sends kmersize
+       int MPIRecv(int);
+       #endif
+       
 private:
        
        int kmerSize;
index 7904425caf46918ef8e79aa34abea972b77b2cd5..dcb026c310ef7052116bb61f5b3517e24ae1e39e 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -941,6 +941,78 @@ inline string sortFile(string distFile){
                exit(1);
        }       
 }
+/**************************************************************************************************/
+inline vector<long> setFilePosFasta(string filename, int& num) {
+
+                       vector<long> positions;
+                       ifstream inFASTA;
+                       openInputFile(filename, inFASTA);
+                               
+                       string input;
+                       while(!inFASTA.eof()){
+                               input = getline(inFASTA); gobble(inFASTA);
+                               if (input.length() != 0) {
+                                       if(input[0] == '>'){    long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);      }
+                               }
+                       }
+                       inFASTA.close();
+               
+                       num = positions.size();
+               
+                       FILE * pFile;
+                       long size;
+               
+                       //get num bytes in file
+                       pFile = fopen (filename.c_str(),"rb");
+                       if (pFile==NULL) perror ("Error opening file");
+                       else{
+                               fseek (pFile, 0, SEEK_END);
+                               size=ftell (pFile);
+                               fclose (pFile);
+                       }
+               
+                       positions.push_back(size);
+               
+                       return positions;
+}
+/**************************************************************************************************/
+inline vector<long> setFilePosEachLine(string filename, int& num) {
+
+                       vector<long> positions;
+                       ifstream in;
+                       openInputFile(filename, in);
+                               
+                       string input;
+                       while(!in.eof()){
+                               long lastpos = in.tellg();
+                               input = getline(in); gobble(in);
+                               if (input.length() != 0) {
+                                       long pos = in.tellg(); 
+                                       if (pos != -1) { positions.push_back(pos - input.length() - 1); }
+                                       else {  positions.push_back(lastpos);  }
+                               }
+                       }
+                       in.close();
+               
+                       num = positions.size();
+               
+                       FILE * pFile;
+                       long size;
+               
+                       //get num bytes in file
+                       pFile = fopen (filename.c_str(),"rb");
+                       if (pFile==NULL) perror ("Error opening file");
+                       else{
+                               fseek (pFile, 0, SEEK_END);
+                               size=ftell (pFile);
+                               fclose (pFile);
+                       }
+               
+                       positions.push_back(size);
+               
+                       return positions;
+}
+
 /**************************************************************************************************/
 #endif
 
index aa7d3934ae11c3c4490ed3beaaceaf320ddfc6be..132886bd55be4e9a57aa52816cfc49bcfb98eb42 100644 (file)
 #include "alignment.hpp"
 #include "nastreport.hpp"
 
+
+/******************************************************************************************************************/
+
+NastReport::NastReport() {
+       output = "";
+}
+/******************************************************************************************************************/
+string NastReport::getHeaders() {
+       output = "";
+       
+       output += "QueryName\tQueryLength\tTemplateName\tTemplateLength\t";
+       output += "SearchMethod\tSearchScore\t";
+       output += "AlignmentMethod\tQueryStart\tQueryEnd\tTemplateStart\tTemplateEnd\t";
+       output += "PairwiseAlignmentLength\tGapsInQuery\tGapsInTemplate\t";
+       output += "LongestInsert\t";
+       output += "SimBtwnQuery&Template\n";
+       
+       return output;
+}
 /******************************************************************************************************************/
 
 NastReport::NastReport(string candidateReportFName) {
@@ -47,6 +66,38 @@ void NastReport::print(){
        candidateReportFile << endl;
        candidateReportFile.flush();
 }
+/******************************************************************************************************************/
+
+string NastReport::getReport(){
+       
+       output = "";
+       
+       output += queryName + '\t' + toString(queryLength) + '\t' + templateName + '\t' + toString(templateLength) + '\t';
+       
+       string temp = toString(searchScore);
+       int pos = temp.find_last_of('.');  //find deicmal point if their is one
+       
+       //if there is a decimal
+       if (pos != -1) { temp = temp.substr(0, pos+3); } //set precision to 2 places
+       else{   temp += ".00";  }
+       
+       output += searchMethod + '\t' + temp + '\t';
+       output += alignmentMethod + '\t' + toString(candidateStartPosition) + "\t" + toString(candidateEndPosition) + '\t';
+       output += toString(templateStartPosition) + "\t" + toString(templateEndPosition) + '\t';
+       output += toString(pairwiseAlignmentLength) + '\t' + toString(totalGapsInQuery) + '\t' + toString(totalGapsInTemplate) + '\t';
+       output += toString(longestInsert) + '\t';
+       
+       temp = toString(similarityToTemplate);
+       pos = temp.find_last_of('.');  //find deicmal point if their is one
+       
+       //if there is a decimal
+       if (pos != -1) { temp = temp.substr(0, pos+3); } //set precision to 2 places
+       else{   temp += ".00";  }
+       
+       output += temp + '\n';
+       
+       return output;
+}
 
 /******************************************************************************************************************/
 
index 2be289c2345df9929949ddc725936adb7ee0c1ab..80c99490b70e04e945b895fb19795a22fe5f241f 100644 (file)
@@ -19,6 +19,7 @@ class NastReport {
 
 public:
        NastReport(string);
+       NastReport();
        ~NastReport();
        void setCandidate(Sequence*);
        void setTemplate(Sequence*);
@@ -26,9 +27,12 @@ public:
        void setAlignmentParameters(string, Alignment*);
        void setNastParameters(Nast);
        void print();
+       string getReport();
+       string getHeaders();
        
 private:
        string queryName;
+       string output;
        int queryLength;
        string templateName;
        int templateLength;
diff --git a/parsesffcommand.cpp b/parsesffcommand.cpp
new file mode 100644 (file)
index 0000000..ed26be0
--- /dev/null
@@ -0,0 +1,562 @@
+/*
+ *  parsesffcommand.cpp
+ *  Mothur
+ *
+ *  Created by Pat Schloss on 2/6/10.
+ *  Copyright 2010 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "parsesffcommand.h"
+#include "sequence.hpp"
+
+//**********************************************************************************************************************
+
+ParseSFFCommand::ParseSFFCommand(string option){
+       try {
+               abort = false;
+               
+               if(option == "help") {
+                       help();
+                       abort = true; 
+               }
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"sff", "oligos", "minlength", "outputdir", "inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("sff");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["sff"] = inputDir + it->second;              }
+                               }
+                               
+                               it = parameters.find("oligos");
+                               //user has given an oligos file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
+                       }
+                       
+                       
+                       //check for required parameters
+                       sffFile = validParameter.validFile(parameters, "sff", true);
+                       if (sffFile == "not found"){
+                               m->mothurOut("sff is a required parameter for the parse.sff command.");
+                               m->mothurOutEndLine();
+                               abort = true;
+                       }
+                       else if (sffFile == "not open")         {       abort = true;   }       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);
+                       if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(sffFile); //if user entered a file with a path then preserve it    
+                       }
+
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...                       
+                       oligoFile = validParameter.validFile(parameters, "oligos", true);
+                       if (oligoFile == "not found")   {       oligoFile = "";         }
+                       else if(oligoFile == "not open"){       abort = true;           } 
+                       
+                       string temp = validParameter.validFile(parameters, "minlength", false);
+                       if (temp == "not found") { temp = "0"; }
+                       convert(temp, minLength); 
+               }               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseSFFCommand", "ParseSFFCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+ParseSFFCommand::~ParseSFFCommand()    {       /*      do nothing      */      }
+
+//**********************************************************************************************************************
+
+int ParseSFFCommand::execute(){
+       try {
+               if (abort == true) {    return 0;       }
+
+               ifstream inSFF;
+               openInputFile(sffFile, inSFF);
+               
+               cout.setf(ios::fixed, ios::floatfield);
+               cout.setf(ios::showpoint);
+               cout << setprecision(2);
+                       
+               vector<ofstream*> flowFileNames;
+               if(oligoFile != ""){
+                       getOligos(flowFileNames);
+               }
+               else{
+                       flowFileNames.push_back(new ofstream((outputDir + getRootName(getSimpleName(sffFile)) + "flow").c_str(), ios::ate));
+                       outputNames.push_back((outputDir + getRootName(getSimpleName(sffFile)) + "flow"));
+               }
+               
+               for(int i=0;i<flowFileNames.size();i++){
+                       flowFileNames[i]->setf(ios::fixed, ios::floatfield);
+                       flowFileNames[i]->setf(ios::showpoint);
+                       *flowFileNames[i] << setprecision(2);
+               }                       
+               
+               if (m->control_pressed) { for(int i=0;i<flowFileNames.size();i++){      flowFileNames[i]->close();  } return 0; }
+               
+//             ofstream fastaFile;
+//             openOutputFile(getRootName(sffFile) + "fasta", fastaFile);
+
+//             ofstream qualFile;
+//             openOutputFile(getRootName(sffFile) + "qual", qualFile);
+               
+               string commonHeader = getline(inSFF);
+               string magicNumber = getline(inSFF);            
+               string version = getline(inSFF);
+               string indexOffset = getline(inSFF);
+               string indexLength = getline(inSFF);
+               int numReads = parseHeaderLineToInt(inSFF);
+               string headerLength = getline(inSFF);
+               string keyLength = getline(inSFF);
+               int numFlows = parseHeaderLineToInt(inSFF);
+               string flowgramCode = getline(inSFF);
+               string flowChars = getline(inSFF);
+               string keySequence = getline(inSFF);
+               gobble(inSFF);
+
+               string seqName;
+               bool good = 0;
+               
+               for(int i=0;i<numReads;i++){
+                       
+                       if (m->control_pressed) { for(int i=0;i<flowFileNames.size();i++){      flowFileNames[i]->close();  } return 0; }
+                       
+                       inSFF >> seqName;
+                       seqName = seqName.substr(1);
+                       gobble(inSFF);
+                       
+                       string runPrefix = parseHeaderLineToString(inSFF);
+                       string regionNumber = parseHeaderLineToString(inSFF);
+                       string xyLocation = parseHeaderLineToString(inSFF);
+                       gobble(inSFF);
+                       
+                       string runName = parseHeaderLineToString(inSFF);
+                       string analysisName = parseHeaderLineToString(inSFF);
+                       string fullPath = parseHeaderLineToString(inSFF);
+                       gobble(inSFF);
+                       
+                       string readHeaderLen = parseHeaderLineToString(inSFF);
+                       string nameLength = parseHeaderLineToString(inSFF);
+                       int numBases = parseHeaderLineToInt(inSFF);
+                       string clipQualLeft = parseHeaderLineToString(inSFF);
+                       int clipQualRight = parseHeaderLineToInt(inSFF);
+                       string clipAdapLeft = parseHeaderLineToString(inSFF);
+                       string clipAdapRight = parseHeaderLineToString(inSFF);
+                       gobble(inSFF);
+                       
+                       vector<float> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);
+                       vector<int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases);
+                       string bases = parseHeaderLineToString(inSFF);
+                       string qualityScores = parseHeaderLineToString(inSFF);
+                       gobble(inSFF);
+                       
+
+                       
+                       int flowLength = flowIndices[clipQualRight-1];
+                                               
+                       screenFlow(flowVector, flowLength);
+                       string sequence = flow2seq(flowVector, flowLength);
+                       
+                       int group = 0;
+       
+                       if(minLength != 0 || numFPrimers != 0  || numBarcodes != 0 || numRPrimers != 0){                
+                               good = screenSeq(sequence, group);
+                       }
+
+                       if(good){
+                               *flowFileNames[group] << seqName << ' ' << flowLength;
+                               for(int i=0;i<numFlows;i++){
+                                       *flowFileNames[group] << ' ' << flowVector[i];
+                               }
+                               *flowFileNames[group] << endl;                          
+                       }
+                       
+//                     string fastaHeader = '>' + seqName + "\tregion=" + regionNumber + " xy=" + xyLocation;
+//                     fastaFile << fastaHeader << endl;
+//                     fastaFile << stripSeqQual(bases, clipQualLeft, clipQualRight) << endl;
+//
+//                     qualFile << fastaHeader << endl;
+//                     qualFile << stripQualQual(qualityScores, clipQualLeft, clipQualRight) << endl;
+
+               }
+               for(int i=0;i<flowFileNames.size();i++){
+                       flowFileNames[i]->close();
+               }
+
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+//             fastaFile.close();
+//             qualFile.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseSFFCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void ParseSFFCommand::help(){
+       try {
+               m->mothurOut("The parse.sff command...");
+               m->mothurOutEndLine();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseSFFCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void ParseSFFCommand::getOligos(vector<ofstream*>& outSFFFlowVec){
+       try {
+
+               ifstream inOligos;
+               openInputFile(oligoFile, inOligos);
+               
+               string type, oligo, group;
+               
+               int index = 0;
+               
+               while(!inOligos.eof()){
+                       inOligos >> type;
+
+                       if(type[0] == '#'){     getline(inOligos);      } // get rest of line if there's any crap there
+                       else{
+                               inOligos >> oligo;
+                               
+                               for(int i=0;i<oligo.length();i++){
+                                       oligo[i] = toupper(oligo[i]);
+                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
+                               }
+                               if(type == "forward"){
+                                       forPrimer.push_back(oligo);
+                               }
+                               else if(type == "reverse"){
+                                       Sequence oligoRC("reverse", oligo);
+                                       oligoRC.reverseComplement();
+                                       revPrimer.push_back(oligoRC.getUnaligned());
+                               }
+                               else if(type == "barcode"){
+                                       inOligos >> group;
+                                       barcodes[oligo]=index++;
+                                       groupVector.push_back(group);
+                                       
+                                       outSFFFlowVec.push_back(new ofstream((outputDir + getRootName(getSimpleName(sffFile)) + group + ".flow").c_str(), ios::ate));
+                                       outputNames.push_back((outputDir + getRootName(getSimpleName(sffFile)) + group + "flow"));
+                               }
+                       }
+                       gobble(inOligos);
+               }
+               
+               inOligos.close();
+               
+               numFPrimers = forPrimer.size();
+               numRPrimers = revPrimer.size();
+               numBarcodes = barcodes.size();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseSFFCommand", "getOligos");
+               exit(1);
+       }
+       
+}
+
+//**********************************************************************************************************************
+
+int ParseSFFCommand::parseHeaderLineToInt(ifstream& file){
+       
+       int number;
+
+       while (!file.eof())     {
+
+               char c = file.get(); 
+               if (c == ':'){
+                       file >> number;
+                       break;
+               }
+               
+       }
+       gobble(file);
+       return number;
+}
+
+//**********************************************************************************************************************
+
+string ParseSFFCommand::parseHeaderLineToString(ifstream& file){
+       
+       string text;
+       
+       while (!file.eof())     {
+               char c = file.get(); 
+               
+               if (c == ':'){
+                       gobble(file);
+                       text = getline(file);                   
+                       break;
+               }
+       }
+       gobble(file);
+
+       return text;
+}
+
+//**********************************************************************************************************************
+
+vector<float> ParseSFFCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
+       
+       vector<float> floatVector(length);
+       
+       while (!file.eof())     {
+               char c = file.get(); 
+               if (c == ':'){
+                       for(int i=0;i<length;i++){
+                               file >> floatVector[i];
+                       }
+                       break;
+               }
+       }
+       gobble(file);   
+       return floatVector;
+}
+
+//**********************************************************************************************************************
+
+vector<int> ParseSFFCommand::parseHeaderLineToIntVector(ifstream& file, int length){
+       
+       vector<int> intVector(length);
+       
+       while (!file.eof())     {
+               char c = file.get(); 
+               if (c == ':'){
+                       for(int i=0;i<length;i++){
+                               file >> intVector[i];
+                       }
+                       break;
+               }
+       }
+       gobble(file);   
+       return intVector;
+}
+
+//**********************************************************************************************************************
+
+
+void ParseSFFCommand::screenFlow(vector<float> flowgram, int& length){
+       try{
+
+               int newLength = 0;
+
+               while(newLength * 4 < length){
+                       
+                       int signal = 0;
+                       int noise = 0;
+                       for(int i=0;i<4;i++){
+                               float flow = flowgram[i + 4 * newLength];
+
+                               if(flow > 0.50){
+                                       signal++;
+                                       if(flow <= 0.69){ // not sure why, but if i make it <0.70 it doesn't work...
+                                               noise++;
+                                       }
+                               }
+                       }
+                       if(noise > 0 || signal == 0){
+                               break;
+                       }                       
+                       newLength++;
+               }
+               length = newLength * 4;
+       }
+       
+       catch(exception& e) {
+               m->errorOut(e, "ParseSFFCommand", "screenFlow");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+string ParseSFFCommand::flow2seq(vector<float> flowgram, int length){
+
+       string flow = "TACG";
+       string sequence = "";
+       for(int i=8;i<length;i++){
+               int signal = int(flowgram[i] + 0.5);
+               char base = flow[ i % 4 ];
+               for(int j=0;j<signal;j++){
+                       sequence += base;
+               }
+       }
+       return sequence;
+}
+
+//**********************************************************************************************************************
+
+bool ParseSFFCommand::screenSeq(string& sequence, int& group){
+
+       int length = 1;
+       group = -1;
+       
+       if(sequence.length() < minLength){      length = 0;     }
+       
+       int barcode = 1;
+       int barcodeLength = 0;
+
+       for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
+               if(compareDNASeq(it->first, sequence.substr(0,(it->first).length()))){
+                       barcode = 1;
+                       barcodeLength = (it->first).size();
+                       group = it->second;
+                       break;
+               }
+               else{
+                       barcode = 0;
+               }
+       }
+       
+       
+       int fPrimer = 1;
+       for(int i=0;i<numFPrimers;i++){
+               if(compareDNASeq(forPrimer[i], sequence.substr(barcodeLength,forPrimer[i].length()))){
+                       fPrimer = 1;
+                       break;
+               }
+               else{
+                       fPrimer = 0;
+               }
+       }
+       
+       int rPrimer = 1;
+       for(int i=0;i<numRPrimers;i++){
+               if(compareDNASeq(revPrimer[i], sequence.substr(sequence.length()-revPrimer[i].length(),revPrimer[i].length()))){
+                       rPrimer = 1;
+                       break;
+               }
+               else{
+                       rPrimer = 0;
+               }
+       }
+
+       return fPrimer * rPrimer * length * barcode;
+               
+}
+
+//**********************************************************************************************************************
+          
+bool ParseSFFCommand::compareDNASeq(string oligo, string seq){
+   try {
+          bool success = 1;
+          int length = oligo.length();
+          
+          for(int i=0;i<length;i++){
+                  
+                  if(oligo[i] != seq[i]){
+                          if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')         {       success = 0;    }
+                          else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                     {       success = 0;    }
+                          else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                         {       success = 0;    }
+                          else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                         {       success = 0;    }
+                          else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                         {       success = 0;    }
+                          else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                         {       success = 0;    }
+                          else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                         {       success = 0;    }
+                          else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                         {       success = 0;    }
+                          else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))        {       success = 0;    }
+                          else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))        {       success = 0;    }
+                          else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))        {       success = 0;    }
+                          else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))        {       success = 0;    }                       
+                          
+                          if(success == 0)     {       break;  }
+                  }
+                  else{
+                          success = 1;
+                  }
+          }
+          
+          return success;
+   }
+   catch(exception& e) {
+          m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
+          exit(1);
+   }
+}
+          
+//**********************************************************************************************************************
+
+//string ParseSFFCommand::stripSeqQual(string qScores, int start, int end){
+//     
+//     
+//     return qScores.substr(start-1, end-start+1);
+//
+//}
+
+//**********************************************************************************************************************
+
+//string ParseSFFCommand::stripQualQual(string qScores, int start, int end){
+//     
+//     start--;
+//     
+//     int startCount = 0;
+//     int startIndex = 0;
+//     
+//     while(startCount < start && startIndex < qScores.length()){
+//             if(isspace(qScores[startIndex])){
+//                     startCount++;
+//             }
+//        startIndex++;
+//     }
+//     
+//     int endCount = startCount;
+//     int endIndex = startIndex;
+//     
+//     while(endCount < end && endIndex < qScores.length()){
+//             if(isspace(qScores[endIndex])){
+//                     endCount++;
+//             }
+//             endIndex++;
+//     }
+//     
+//   return qScores.substr(startIndex, endIndex-startIndex-1);//, endCount-startCount);
+//     
+//}
+
+//**********************************************************************************************************************
+
+
diff --git a/parsesffcommand.h b/parsesffcommand.h
new file mode 100644 (file)
index 0000000..409293c
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef PARSESFFCOMMAND_H
+#define PARSESFFCOMMAND_H
+
+/*
+ *  parsesffcommand.h
+ *  Mothur
+ *
+ *  Created by Pat Schloss on 2/6/10.
+ *  Copyright 2010 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+
+class ParseSFFCommand : public Command {
+public:
+       ParseSFFCommand(string);
+       ~ParseSFFCommand();
+       int execute();
+       void help();    
+       
+private:
+
+       int parseHeaderLineToInt(ifstream&);
+       vector<float> parseHeaderLineToFloatVector(ifstream&, int);
+       vector<int> parseHeaderLineToIntVector(ifstream&, int);
+       string parseHeaderLineToString(ifstream&);
+       void screenFlow(vector<float>, int&);
+       string flow2seq(vector<float>, int);
+       bool screenSeq(string&, int&);
+       bool compareDNASeq(string, string);
+       void getOligos(vector<ofstream*>&);
+       
+       
+       string sffFile;
+       string oligoFile;
+
+       int minLength;
+       int numFPrimers, numRPrimers, numBarcodes;
+       vector<string> forPrimer, revPrimer;
+       map<string, int> barcodes;
+       vector<string> groupVector;
+       vector<string> outputNames;
+
+//     string stripSeqQual(string, int, int);
+//     string stripQualQual(string, int, int);
+       
+       string outputDir;
+       bool abort;
+};
+
+#endif
+
+
index 6e9e95c9f7eaf20f91ba312b93a52be67c82b828..84c3219822920e21bc20d89f2fdf6b4f58ead1f3 100644 (file)
@@ -18,10 +18,30 @@ inline bool compareQuanMembers(quanMember left, quanMember right){
 } 
 //***************************************************************************************************************
 
-Pintail::Pintail(string filename, string o) {  
-       fastafile = filename;  outputDir = o; 
-       distcalculator = new eachGapDist();
-       decalc = new DeCalculator();
+Pintail::Pintail(string filename, string temp, bool f, int p, string mask, string cons, string q, int win, int inc, string o) : Chimera() { 
+       try {
+       
+               fastafile = filename; 
+               templateFileName = temp; templateSeqs = readSeqs(temp);
+               filter = f;
+               processors = p;
+               setMask(mask);
+               consfile = cons;
+               quanfile = q;
+               window = win;
+               increment = inc; 
+               outputDir = o; 
+               
+               distcalculator = new eachGapDist();
+               decalc = new DeCalculator();
+               
+               doPrep();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Pintail", "Pintail");
+               exit(1);
+       }
+
 }
 //***************************************************************************************************************
 
@@ -51,6 +71,9 @@ int Pintail::doPrep() {
                
                decalc->setMask(seqMask);
                
+       #ifdef USE_MPI
+               //do nothing
+       #else
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        //find breakup of templatefile for quantiles
                        if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
@@ -64,7 +87,7 @@ int Pintail::doPrep() {
                #else
                        templateLines.push_back(new linePair(0, templateSeqs.size()));
                #endif
-
+       #endif
                
                m->mothurOut("Getting conservation... "); cout.flush();
                if (consfile == "") { 
@@ -76,30 +99,15 @@ int Pintail::doPrep() {
                m->mothurOutEndLine();
                
                //make P into Q
-               for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
+               for (int i = 0; i < probabilityProfile.size(); i++)  { probabilityProfile[i] = 1 - probabilityProfile[i];  }  //
                
                bool reRead = false;
                //create filter if needed for later
                if (filter) {
                                                
                        //read in all query seqs
-                       ifstream in; 
-                       openInputFile(fastafile, in);
-                       
-                       vector<Sequence*> tempQuerySeqs;
-                       while(!in.eof()){
-                               if (m->control_pressed) {  
-                                       for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
-                                       return 0; 
-                               }
+                       vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
                                
-                               Sequence* s = new Sequence(in);
-                               gobble(in);
-                               
-                               if (s->getName() != "") { tempQuerySeqs.push_back(s); }
-                       }
-                       in.close();
-                       
                        vector<Sequence*> temp;
                        //merge query seqs and template seqs
                        temp = templateSeqs;
@@ -159,7 +167,6 @@ int Pintail::doPrep() {
                
                        if (m->control_pressed) {  return 0;  }
                        
-                       ofstream out4, out5;
                        string noOutliers, outliers;
                        
                        if ((!filter) && (seqMask == "")) {
@@ -175,8 +182,9 @@ int Pintail::doPrep() {
                        decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
                        
                        if (m->control_pressed) {  return 0;  }
+               
+                       string outputString = "";
                        
-                       openOutputFile(noOutliers, out5);                       
                        //adjust quantiles
                        for (int i = 0; i < quantilesMembers.size(); i++) {
                                vector<float> temp;
@@ -206,14 +214,16 @@ int Pintail::doPrep() {
                                }
                                
                                //output quan value
-                               out5 << i+1 << '\t';                            
-                               for (int u = 0; u < temp.size(); u++) {   out5 << temp[u] << '\t'; }
-                               out5 << endl;
+                               outputString += toString(i+1) + "\t";                           
+                               for (int u = 0; u < temp.size(); u++) {   outputString += toString(temp[u]) + "\t"; }
+                               outputString += "\n";
                                
                                quantiles[i] = temp;
                                
                        }
-
+                       
+                       printQuanFile(noOutliers, outputString);
+                       
                        m->mothurOut("Done."); m->mothurOutEndLine();
                }
                
@@ -274,7 +284,64 @@ int Pintail::print(ostream& out, ostream& outAcc) {
                exit(1);
        }
 }
+#ifdef USE_MPI
+//***************************************************************************************************************
+int Pintail::print(MPI_File& out, MPI_File& outAcc) {
+       try {
+               bool results = false;
+               string outputString = "";
+               int index = ceil(deviation);
+               
+               //is your DE value higher than the 95%
+               string chimera;
+               if (index != 0) {  //if index is 0 then its an exact match to a template seq
+                       if (quantiles[index][4] == 0.0) {
+                               chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
+                       }else {
+                               if (DE > quantiles[index][4])           {       chimera = "Yes";        }
+                               else                                                            {       chimera = "No";         }
+                       }
+               }else{ chimera = "No";          }
 
+               outputString += querySeq->getName() + "\tdiv: " + toString(deviation) + "\tstDev: " + toString(DE) + "\tchimera flag: " + chimera + "\n";
+               if (chimera == "Yes") {
+                       cout << querySeq->getName() << "\tdiv: " << toString(deviation) << "\tstDev: " << toString(DE) << "\tchimera flag: " << chimera << endl;
+                       string outAccString = querySeq->getName() + "\n";
+                       
+                       MPI_Status statusAcc;
+                       int length = outAccString.length();
+                       char buf[length];
+                       strcpy(buf, outAccString.c_str()); 
+                               
+                       MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
+                       
+                       results = true;
+               }
+               outputString += "Observed\t";
+               
+               for (int j = 0; j < obsDistance.size(); j++) {  outputString += toString(obsDistance[j]) + "\t";  }
+               outputString += "\n";
+               
+               outputString += "Expected\t";
+               
+               for (int m = 0; m < expectedDistance.size(); m++) {  outputString += toString(expectedDistance[m]) + "\t";  }
+               outputString += "\n";
+               
+               MPI_Status status;
+               int length = outputString.length();
+               char buf2[length];
+               strcpy(buf2, outputString.c_str()); 
+                               
+               MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
+               
+               return results;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Pintail", "print");
+               exit(1);
+       }
+}
+#endif
 //***************************************************************************************************************
 int Pintail::getChimeras(Sequence* query) {
        try {
@@ -346,16 +413,56 @@ int Pintail::getChimeras(Sequence* query) {
 
 vector<float> Pintail::readFreq() {
        try {
-       
-               ifstream in;
-               openInputFile(consfile, in);
-               
+               //read in probabilities and store in vector
+               int pos; float num; 
+
                vector<float> prob;
                set<int> h = decalc->getPos();  //positions of bases in masking sequence
                
-               //read in probabilities and store in vector
-               int pos; float num; 
+       #ifdef USE_MPI
+               
+               MPI_File inMPI;
+               MPI_Offset size;
+               MPI_Status status;
                
+               char inFileName[consfile.length()];
+               strcpy(inFileName, consfile.c_str());
+
+               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
+               MPI_File_get_size(inMPI, &size);
+
+               char buffer[size];
+               MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
+
+               string tempBuf = buffer;
+
+               if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+               istringstream iss (tempBuf,istringstream::in);
+               
+               while(!iss.eof()) {
+                       iss >> pos >> num;
+       
+                       if (h.count(pos) > 0) {
+                               float Pi;
+                               Pi =  (num - 0.25) / 0.75; 
+                       
+                               //cannot have probability less than 0.
+                               if (Pi < 0) { Pi = 0.0; }
+
+                               //do you want this spot
+                               prob.push_back(Pi);  
+                       }
+                       
+                       gobble(iss);
+               }
+       
+               MPI_File_close(&inMPI);
+               
+       #else   
+
+               ifstream in;
+               openInputFile(consfile, in);
+                               
                while(!in.eof()){
                        
                        in >> pos >> num;
@@ -373,8 +480,10 @@ vector<float> Pintail::readFreq() {
                        
                        gobble(in);
                }
-               
                in.close();
+               
+       #endif
+       
                return prob;
                
        }
@@ -400,7 +509,7 @@ Sequence* Pintail::findPairs(Sequence* q) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+//**************************************************************************************************
 void Pintail::createProcessesQuan() {
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
@@ -464,13 +573,12 @@ void Pintail::createProcessesQuan() {
                                vector<quanMember> q;  float w; int b, n;
                                for (int j = 0; j < num; j++) {
                                        in >> w >> b >> n;
-       //cout << w << '\t' << b << '\t' n << endl;
+       
                                        quanMember newMember(w, b, n);
                                        q.push_back(newMember);
                                }
-//cout << "here" << endl;
+
                                quan[m] = q;
-//cout << "now here" << endl;
                                gobble(in);
                        }
                        
@@ -495,8 +603,134 @@ void Pintail::createProcessesQuan() {
                exit(1);
        }
 }
+//***************************************************************************************************************
+vector< vector<float> > Pintail::readQuantiles() {
+       try {
+               int num; 
+               float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
+               
+               vector< vector<float> > quan;
+               vector <float> temp; temp.resize(6, 0);
+               
+               //to fill 0
+               quan.push_back(temp); 
 
+       #ifdef USE_MPI
+               
+               MPI_File inMPI;
+               MPI_Offset size;
+               MPI_Status status;
+               
+               char inFileName[quanfile.length()];
+               strcpy(inFileName, quanfile.c_str());
+
+               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
+               MPI_File_get_size(inMPI, &size);
+
+               char buffer[size];
+               MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
+
+               string tempBuf = buffer;
+               if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+               istringstream iss (tempBuf,istringstream::in);
+               
+               while(!iss.eof()) {
+                       iss >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
+                       
+                       temp.clear();
+                       
+                       temp.push_back(ten); 
+                       temp.push_back(twentyfive);
+                       temp.push_back(fifty);
+                       temp.push_back(seventyfive);
+                       temp.push_back(ninetyfive);
+                       temp.push_back(ninetynine);
+                       
+                       quan.push_back(temp);  
+                       
+                       gobble(iss);
+               }
+       
+               MPI_File_close(&inMPI);
+               
+       #else   
+
+               ifstream in;
+               openInputFile(quanfile, in);
+                       
+               while(!in.eof()){
+                       
+                       in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
+                       
+                       temp.clear();
+                       
+                       temp.push_back(ten); 
+                       temp.push_back(twentyfive);
+                       temp.push_back(fifty);
+                       temp.push_back(seventyfive);
+                       temp.push_back(ninetyfive);
+                       temp.push_back(ninetynine);
+                       
+                       quan.push_back(temp);  
+       
+                       gobble(in);
+               }
+               in.close();
+       #endif
+       
+               return quan;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Pintail", "readQuantiles");
+               exit(1);
+       }
+}
+//***************************************************************************************************************/
+
+void Pintail::printQuanFile(string file, string outputString) {
+       try {
+       
+               #ifdef USE_MPI
+               
+                       MPI_File outQuan;
+                       MPI_Status status;
+                       
+                       int pid;
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+                       
+                       char FileName[file.length()];
+                       strcpy(FileName, file.c_str());
+                       
+                       if (pid == 0) {
+                               MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outQuan);  //comm, filename, mode, info, filepointer
+                               
+                               int length = outputString.length();
+                               char buf[length];
+                               strcpy(buf, outputString.c_str()); 
+                                       
+                               MPI_File_write(outQuan, buf, length, MPI_CHAR, &status);
+                       
+                               MPI_File_close(&outQuan);
+                       }
+               #else
+                       ofstream outQuan;
+                       openOutputFile(file, outQuan);
+                       
+                       outQuan << outputString;
+                       
+                       outQuan.close();
+               #endif
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Pintail", "printQuanFile");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************/
 
-//***************************************************************************************************************
 
 
index 59d4feeec7fa3743a059b331db524b0df3fe48cf..7164842106d7b61d36883b351167163aeb4e878a 100644 (file)
--- a/pintail.h
+++ b/pintail.h
@@ -24,7 +24,7 @@
 class Pintail : public Chimera {
        
        public:
-               Pintail(string, string);        
+               Pintail(string, string, bool, int, string, string, string, int, int, string); //fastafile, templatefile, filter, processors, mask, conservation, quantile, window, increment, outputDir)        
                ~Pintail();
                
                int getChimeras(Sequence*);
@@ -33,13 +33,16 @@ class Pintail : public Chimera {
                void setCons(string c)          { consfile = c;  }
                void setQuantiles(string q) { quanfile = q;  }
                
+               #ifdef USE_MPI
+               int print(MPI_File&, MPI_File&);
+               #endif
                
        private:
        
                Dist* distcalculator;
                DeCalculator* decalc;
-               int iters;
-               string fastafile, consfile;
+               int iters, window, increment, processors;
+               string fastafile, quanfile, consfile;
                
                vector<linePair*> templateLines;
                Sequence* querySeq;
@@ -52,7 +55,7 @@ class Pintail : public Chimera {
                vector<int>  windowsForeachQuery;  // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window.
                                                                                //this is needed so you can move by bases and not just spots in the alignment
                                                                                
-               int windowSizes;                        //windowSizes = window size of query
+               int  windowSizes;                       //windowSizes = window size of query
                vector<int> windowSizesTemplate;    //windowSizesTemplate[0] = window size of templateSeqs[0]
                
                map<int, int> trimmed;    //trimmed = start and stop of trimmed sequences for query
@@ -67,12 +70,13 @@ class Pintail : public Chimera {
                set<int>  h;
                string mergedFilterString;
                
-               
+               vector< vector<float> > readQuantiles();
                vector<float> readFreq();
                Sequence* findPairs(Sequence*);
                        
                void createProcessesQuan();
                int doPrep();
+               void printQuanFile(string, string);
                
 };
 
index d58b0377cb48d1964b21479ca64278919dbf8cca..f61a40c483b69f7418ca4d2702ca995f4180f3e2 100644 (file)
-/*\r
- *  readcolumn.cpp\r
- *  Mothur\r
- *\r
- *  Created by Sarah Westcott on 4/21/09.\r
- *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.\r
- *\r
- */\r
-\r
-#include "readcolumn.h"\r
-#include "progress.hpp"\r
-\r
-/***********************************************************************/\r
-\r
-ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){\r
-       \r
-       successOpen = openInputFile(distFile, fileHandle);\r
-       \r
-}\r
-\r
-/***********************************************************************/\r
-\r
-int ReadColumnMatrix::read(NameAssignment* nameMap){\r
-       try {           \r
-\r
-               string firstName, secondName;\r
-               float distance;\r
-               int nseqs = nameMap->size();\r
-\r
-               list = new ListVector(nameMap->getListVector());\r
-       \r
-               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);\r
-\r
-               int lt = 1;\r
-               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose\r
-               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix\r
-\r
-               //need to see if this is a square or a triangular matrix...\r
-       \r
-               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...\r
-\r
-               \r
-                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance\r
-                       \r
-                       if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }\r
-       \r
-                       map<string,int>::iterator itA = nameMap->find(firstName);\r
-                       map<string,int>::iterator itB = nameMap->find(secondName);\r
-                       \r
-                       if(itA == nameMap->end()){\r
-                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);\r
-                       }\r
-                       if(itB == nameMap->end()){\r
-                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);\r
-                       }\r
-\r
-                       if (distance == -1) { distance = 1000000; }\r
-                       \r
-                       if(distance < cutoff && itA != itB){\r
-                               if(itA->second > itB->second){\r
-                                       PCell value(itA->second, itB->second, distance);\r
-                       \r
-                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
-                                               refRow = itA->second;\r
-                                               refCol = itB->second;\r
-                                               D->addCell(value);\r
-                                       }\r
-                                       else if(refRow == itA->second && refCol == itB->second){\r
-                                               lt = 0;\r
-                                       }\r
-                                       else{\r
-                                               D->addCell(value);\r
-                                       }\r
-                               }\r
-                               else if(itA->second < itB->second){\r
-                                       PCell value(itB->second, itA->second, distance);\r
-                       \r
-                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
-                                               refRow = itA->second;\r
-                                               refCol = itB->second;\r
-                                               D->addCell(value);\r
-                                       }\r
-                                       else if(refRow == itB->second && refCol == itA->second){\r
-                                               lt = 0;\r
-                                       }\r
-                                       else{\r
-                                               D->addCell(value);\r
-                                       }\r
-                               }\r
-                               reading->update(itA->second * nseqs);\r
-                       }\r
-                       gobble(fileHandle);\r
-               }\r
-\r
-               if(lt == 0){  // oops, it was square\r
-                       fileHandle.close();  //let's start over\r
-                       D->clear();  //let's start over\r
-                  \r
-                       openInputFile(distFile, fileHandle);  //let's start over\r
-\r
-                       while(fileHandle){\r
-                               fileHandle >> firstName >> secondName >> distance;\r
-                               \r
-                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }\r
-               \r
-                               map<string,int>::iterator itA = nameMap->find(firstName);\r
-                               map<string,int>::iterator itB = nameMap->find(secondName);\r
-                               \r
-                               if(itA == nameMap->end()){\r
-                                       cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
-                               }\r
-                               if(itB == nameMap->end()){\r
-                                       cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
-                               }\r
-                               \r
-                               if (distance == -1) { distance = 1000000; }\r
-                               \r
-                               if(distance < cutoff && itA->second > itB->second){\r
-                                       PCell value(itA->second, itB->second, distance);\r
-                                       D->addCell(value);\r
-                                       reading->update(itA->second * nseqs);\r
-                               }\r
-               \r
-                               gobble(fileHandle);\r
-                       }\r
-               }\r
-               \r
-               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }\r
-               \r
-               reading->finish();\r
-               fileHandle.close();\r
-\r
-               list->setLabel("0");\r
-               \r
-               return 1;\r
-\r
-       }\r
-       catch(exception& e) {\r
-               m->errorOut(e, "ReadColumnMatrix", "read");\r
-               exit(1);\r
-       }\r
-}\r
-\r
-/***********************************************************************/\r
-\r
-ReadColumnMatrix::~ReadColumnMatrix(){\r
-       //delete D;\r
-       //delete list;\r
-}\r
-\r
-\r
+/*
+ *  readcolumn.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 4/21/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "readcolumn.h"
+#include "progress.hpp"
+
+/***********************************************************************/
+
+ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
+       
+       successOpen = openInputFile(distFile, fileHandle);
+       
+}
+
+/***********************************************************************/
+
+int ReadColumnMatrix::read(NameAssignment* nameMap){
+       try {           
+
+               string firstName, secondName;
+               float distance;
+               int nseqs = nameMap->size();
+
+               list = new ListVector(nameMap->getListVector());
+       
+               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
+
+               int lt = 1;
+               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
+               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
+
+               //need to see if this is a square or a triangular matrix...
+       
+               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
+
+               
+                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
+                       
+                       if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+       
+                       map<string,int>::iterator itA = nameMap->find(firstName);
+                       map<string,int>::iterator itB = nameMap->find(secondName);
+                               
+                       if(itA == nameMap->end()){
+                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);
+                       }
+                       if(itB == nameMap->end()){
+                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);
+                       }
+
+                       if (distance == -1) { distance = 1000000; }
+                       else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                       
+                       if(distance < cutoff && itA != itB){
+                               if(itA->second > itB->second){
+                                       PCell value(itA->second, itB->second, distance);
+                       
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
+                                               refRow = itA->second;
+                                               refCol = itB->second;
+                                               D->addCell(value);
+                                       }
+                                       else if(refRow == itA->second && refCol == itB->second){
+                                               lt = 0;
+                                       }
+                                       else{
+                                               D->addCell(value);
+                                       }
+                               }
+                               else if(itA->second < itB->second){
+                                       PCell value(itB->second, itA->second, distance);
+                       
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
+                                               refRow = itA->second;
+                                               refCol = itB->second;
+                                               D->addCell(value);
+                                       }
+                                       else if(refRow == itB->second && refCol == itA->second){
+                                               lt = 0;
+                                       }
+                                       else{
+                                               D->addCell(value);
+                                       }
+                               }
+                               reading->update(itA->second * nseqs);
+                       }
+                       gobble(fileHandle);
+               }
+
+               if(lt == 0){  // oops, it was square
+       
+                       fileHandle.close();  //let's start over
+                       D->clear();  //let's start over
+                  
+                       openInputFile(distFile, fileHandle);  //let's start over
+
+                       while(fileHandle){
+                               fileHandle >> firstName >> secondName >> distance;
+                               
+                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+               
+                               map<string,int>::iterator itA = nameMap->find(firstName);
+                               map<string,int>::iterator itB = nameMap->find(secondName);
+                               
+                               if(itA == nameMap->end()){
+                                       cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
+                               }
+                               if(itB == nameMap->end()){
+                                       cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
+                               }
+                               
+                               if (distance == -1) { distance = 1000000; }
+                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                               
+                               if(distance < cutoff && itA->second > itB->second){
+                                       PCell value(itA->second, itB->second, distance);
+                                       D->addCell(value);
+                                       reading->update(itA->second * nseqs);
+                               }
+               
+                               gobble(fileHandle);
+                       }
+               }
+               
+               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+               
+               reading->finish();
+               fileHandle.close();
+
+               list->setLabel("0");
+               
+               return 1;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadColumnMatrix", "read");
+               exit(1);
+       }
+}
+
+/***********************************************************************/
+
+ReadColumnMatrix::~ReadColumnMatrix(){
+       //delete D;
+       //delete list;
+}
+
+
index 60d300f177f1deec1720ceeefec71d73ccd7579b..bcecb7844e8a0348180884fbdb54993c286c53ac 100644 (file)
@@ -22,7 +22,7 @@ ReadDistCommand::ReadDistCommand(string option) {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir"};
+                       string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -119,9 +119,13 @@ ReadDistCommand::ReadDistCommand(string option) {
                        // ...at some point should added some additional type checking...
                        //get user cutoff and precision or use defaults
                        string temp;
-                       temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
+                       temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
                        convert(temp, precision); 
                        
+                       temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
+                       sim = isTrue(temp); 
+                       globaldata->sim = sim;
+                       
                        temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
                        convert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));
@@ -166,11 +170,12 @@ ReadDistCommand::ReadDistCommand(string option) {
 
 void ReadDistCommand::help(){
        try {
-               m->mothurOut("The read.dist command parameter options are phylip or column, group, name, cutoff and precision\n");
+               m->mothurOut("The read.dist command parameter options are phylip or column, group, name, sim, cutoff and precision\n");
                m->mothurOut("The read.dist command can be used in two ways.  The first is to read a phylip or column and run the cluster command\n");
                m->mothurOut("For this use the read.dist command should be in the following format: \n");
                m->mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
                m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
+               m->mothurOut("The sim parameter is used to indicate that your distance file contains similiarity values instead of distance values. The default is false, if sim=true then mothur will convert the similairity values to distances. \n");
                m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
                m->mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
                m->mothurOut("For this use the read.dist command should be in the following format: \n");
@@ -204,7 +209,7 @@ int ReadDistCommand::execute(){
                size_t numDists = 0;
                
                vector<string> outputNames;
-cout << format << endl;                
+               
                if (format == "matrix") {
                        ifstream in;
                        openInputFile(distFileName, in);
index 1f852be88e84223a3f07b9847f71e57ee8160442..937ca3f0797c58bc24dc3a239ac5f7c0e4624339 100644 (file)
@@ -42,7 +42,7 @@ private:
        string phylipfile, columnfile, namefile, groupfile, outputDir;
        NameAssignment* nameMap;
 
-       bool abort;
+       bool abort, sim;
 
 };
 
index d4edb5b8ad866e10a15ce82ef22d22d418c68e84..31a4da49c8decf24b75b059db4890d377fa54aef 100644 (file)
@@ -21,7 +21,7 @@ class SparseMatrix;
 class ReadMatrix {
 
 public:
-       ReadMatrix(){   D = new SparseMatrix();  m = MothurOut::getInstance();  }
+       ReadMatrix(){   D = new SparseMatrix();  m = MothurOut::getInstance();   globaldata = GlobalData::getInstance(); }
        virtual ~ReadMatrix() {}
        virtual int read(NameAssignment*){ return 1; }
        
@@ -38,6 +38,7 @@ protected:
        GlobalData* globaldata;
        float cutoff;
        MothurOut* m;
+       bool sim;
 };
 
 
index edda41593deb0fd3ac908ea4ebd333dee8d03edc..f1554565bf9ac58a3e41afa6578742293bd0ea2c 100644 (file)
@@ -85,6 +85,7 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                        
                                                 
                                                         if (distance == -1) { distance = 1000000; }
+                                                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
                                                 
                                                         if(distance < cutoff){
                                                                 PCell value(i, j, distance);
@@ -104,6 +105,7 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                                                if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
                                 
                                                         if (distance == -1) { distance = 1000000; }
+                                                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
                                                         
                                                         if(distance < cutoff){
                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
@@ -135,6 +137,7 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                                                if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
                                                                                                                
                                                         if (distance == -1) { distance = 1000000; }
+                                                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
                                                         
                                                         if(distance < cutoff && j < i){
                                                                 PCell value(i, j, distance);
@@ -153,9 +156,10 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                                                                                                                
                                                                                                                if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
                                                                                                                
-                                                        if (distance == -1) { distance = 1000000; }
+                                                       if (distance == -1) { distance = 1000000; }
+                                                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.                                                        
                                                         
-                                                        if(distance < cutoff && j < i){
+                                                                                                               if(distance < cutoff && j < i){
                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
                                                                 D->addCell(value);
                                                         }
index 4b5667584a07fd1ca5f6832efd16c192e04c5f1c..19adf796b5d1d26f3a681144c6c801cdb4b4f63d 100644 (file)
@@ -443,5 +443,65 @@ void Sequence::reverseComplement(){
        aligned = temp;
        
 }
-
+#ifdef USE_MPI 
 //********************************************************************************************************************
+int Sequence::MPISend(int receiver) {
+       try {
+               //send name - string
+               int length = name.length();
+               char buf[name.length()];
+               strcpy(buf, name.c_str()); 
+               
+               MPI_Send(&length, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
+
+               MPI_Send(&buf, length, MPI_CHAR, receiver, 2001, MPI_COMM_WORLD);
+       
+               //send aligned - string
+               length = aligned.length();
+               char buf2[aligned.length()];
+               strcpy(buf2, aligned.c_str()); 
+       
+               MPI_Send(&length, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
+       
+               MPI_Send(&buf2, length, MPI_CHAR, receiver, 2001, MPI_COMM_WORLD);
+       
+               return 0;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "MPISend");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Sequence::MPIRecv(int sender) {
+       try {
+               MPI_Status status;
+       
+               //receive name - string
+               int length;
+               MPI_Recv(&length, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
+       
+               char buf[length];
+               MPI_Recv(&buf, length, MPI_CHAR, sender, 2001, MPI_COMM_WORLD, &status);
+               name = buf;
+               
+               //receive aligned - string
+               MPI_Recv(&length, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
+       
+               char buf2[length];
+               MPI_Recv(&buf2, length, MPI_CHAR, sender, 2001, MPI_COMM_WORLD, &status);
+               aligned = buf2;
+               
+               setAligned(aligned);
+               
+               return 0;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "MPIRecv");
+               exit(1);
+       }
+}
+#endif
+/**************************************************************************************************/
index 5f84d441c20d0fd8a0ed8f9284b7a32dfc9b4214..21b4c3874c9ccb1f5ab9eab31fe78fbd7f0a40c6 100644 (file)
@@ -47,6 +47,9 @@ public:
        bool getIsAligned();
        void printSequence(ostream&);
        
+       int MPISend(int); //not working at the moment...
+       int MPIRecv(int); //not working at the moment...
+       
 private:
        MothurOut* m;
        void initialize();
index 2aa8f3f3c04e734acb1d15406f324522b24ac2f0..b3496f60703ae6cadf09ad8159fdd4fa1fbf27e6 100644 (file)
@@ -26,6 +26,12 @@ SuffixDB::SuffixDB(int numSeqs) : Database() {
        suffixForest.resize(numSeqs);
        count = 0;
 }
+/**************************************************************************************************/
+
+SuffixDB::SuffixDB() : Database() {
+       count = 0;
+}
+
 /**************************************************************************************************/
 //assumes sequences have been added using addSequence
 vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
@@ -76,4 +82,36 @@ void SuffixDB::addSequence(Sequence seq) {
 SuffixDB::~SuffixDB(){                                                                                                         
        for (int i = (suffixForest.size()-1); i >= 0; i--) {  suffixForest.pop_back();  }
 }
+#ifdef USE_MPI 
+/**************************************************************************************************/
+int SuffixDB::MPISend(int receiver) {
+       try {
+               
+               //send numSeqs - int
+               MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
+                                                                       
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SuffixDB", "MPISend");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int SuffixDB::MPIRecv(int sender) {
+       try {
+               MPI_Status status;
+               //receive numSeqs - int
+               MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
+               
+               suffixForest.resize(numSeqs);
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SuffixDB", "MPIRecv");
+               exit(1);
+       }
+}
+#endif 
 /**************************************************************************************************/
index 1baa99e8d09fbcaa3a52abefb03811d6c3a3ed5f..4dc7e0fadd0cc1575250da05029d3ef32b388894 100644 (file)
@@ -27,11 +27,17 @@ class SuffixDB : public Database {
        
 public:
        SuffixDB(int);
+       SuffixDB();
        ~SuffixDB();
        
        void generateDB() {}; //adding sequences generates the db
        void addSequence(Sequence);
        vector<int> findClosestSequences(Sequence*, int);
+       
+       #ifdef USE_MPI  
+       int MPISend(int); //just sends numSeqs
+       int MPIRecv(int);
+       #endif
 
 private:
        vector<SuffixTree> suffixForest;
index 0c32a3e1c7b00e9fa90c8a5e4ca7fb759238c9e2..2d29ee3f668ee1817cb71c82189c2a8c6f61cf00 100644 (file)
@@ -207,11 +207,23 @@ string ValidParameters::validFile(map<string, string> container, string paramete
                if(it != container.end()){ //no parameter given
 
                        if(isFile == true) {
+                       
+                       #ifdef USE_MPI  
+                               int pid;
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                               if (pid == 0) {
+                       #endif
 
                                ableToOpen = openInputFile(it->second, in);
 
                                if (ableToOpen == 1) { return "not open"; }
                                in.close();
+                               
+                       #ifdef USE_MPI  
+                               }
+                       #endif
+
                        }
                }else { return "not found"; }