]> git.donarmstrong.com Git - mothur.git/blobdiff - aligncommand.cpp
working on chimera change to add trim feature, fixed bug in print of distance file...
[mothur.git] / aligncommand.cpp
index 3f70e2d3733a2d40da2012d8ba31bff61c768b9b..b6a2c0bcd46b7bfeaf48ad548dea6dec5ca3a823 100644 (file)
 
 
 //**********************************************************************************************************************
-
+vector<string> AlignCommand::getValidParameters(){     
+       try {
+               string AlignArray[] =  {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors","flip","threshold","outputdir","inputdir"};
+               vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> AlignCommand::getRequiredParameters(){  
+       try {
+               string AlignArray[] =  {"template","candidate"};
+               vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> AlignCommand::getRequiredFiles(){       
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+AlignCommand::AlignCommand(){  
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["alignreport"] = tempOutNames;
+               outputTypes["accnos"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignCommand", "AlignCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 AlignCommand::AlignCommand(string option)  {
        try {
-               
                abort = false;
        
                //allow user to run help
@@ -52,7 +100,13 @@ AlignCommand::AlignCommand(string option)  {
                        for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
-
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["alignreport"] = tempOutNames;
+                       outputTypes["accnos"] = tempOutNames;
+                       
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
                        
@@ -68,7 +122,7 @@ AlignCommand::AlignCommand(string option)  {
 
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["template"] = inputDir + it->second;         }
                                }
@@ -86,31 +140,48 @@ AlignCommand::AlignCommand(string option)  {
                        candidateFileName = validParameter.validFile(parameters, "candidate", false);
                        if (candidateFileName == "not found") { m->mothurOut("candidate is a required parameter for the align.seqs command."); m->mothurOutEndLine(); abort = true;  }
                        else { 
-                               splitAtDash(candidateFileName, candidateFileNames);
+                               m->splitAtDash(candidateFileName, candidateFileNames);
                                
                                //go through files and make sure they are good, if not, then disregard them
                                for (int i = 0; i < candidateFileNames.size(); i++) {
+                                       //candidateFileNames[i] = m->getFullPathName(candidateFileNames[i]);
+                                       
                                        if (inputDir != "") {
-                                               string path = hasPath(candidateFileNames[i]);
+                                               string path = m->hasPath(candidateFileNames[i]);
                                                //if the user has not given a path then, add inputdir. else leave path alone.
                                                if (path == "") {       candidateFileNames[i] = inputDir + candidateFileNames[i];               }
                                        }
        
                                        int ableToOpen;
                                        ifstream in;
-
-                                       ableToOpen = openInputFile(candidateFileNames[i], in, "noerror");
-                               
+                                       ableToOpen = m->openInputFile(candidateFileNames[i], in, "noerror");
+                                       in.close();     
+                                       
                                        //if you can't open it, try default location
                                        if (ableToOpen == 1) {
                                                if (m->getDefaultPath() != "") { //default path is set
-                                                       string tryPath = m->getDefaultPath() + getSimpleName(candidateFileNames[i]);
+                                                       string tryPath = m->getDefaultPath() + m->getSimpleName(candidateFileNames[i]);
                                                        m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       ifstream in2;
+                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                       in2.close();
                                                        candidateFileNames[i] = tryPath;
                                                }
                                        }
-                                       in.close();                                     
+                                       
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getOutputDir() != "") { //default path is set
+                                                       string tryPath = m->getOutputDir() + m->getSimpleName(candidateFileNames[i]);
+                                                       m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                       ifstream in2;
+                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                       in2.close();
+                                                       candidateFileNames[i] = tryPath;
+                                               }
+                                       }
+                                       
+                                                                       
 
                                        if (ableToOpen == 1) { 
                                                m->mothurOut("Unable to open " + candidateFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
@@ -147,7 +218,7 @@ AlignCommand::AlignCommand(string option)  {
                        convert(temp, processors); 
                        
                        temp = validParameter.validFile(parameters, "flip", false);                     if (temp == "not found"){       temp = "f";                             }
-                       flip = isTrue(temp); 
+                       flip = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found"){       temp = "0.50";                  }
                        convert(temp, threshold); 
@@ -163,7 +234,6 @@ AlignCommand::AlignCommand(string option)  {
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 
 AlignCommand::~AlignCommand(){ 
@@ -180,7 +250,7 @@ AlignCommand::~AlignCommand(){
 void AlignCommand::help(){
        try {
                m->mothurOut("The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n");
-               m->mothurOut("The align.seqs command parameters are template, candidate, search, ksize, align, match, mismatch, gapopen and gapextend.\n");
+               m->mothurOut("The align.seqs command parameters are template, candidate, search, ksize, align, match, mismatch, gapopen, gapextend and processors.\n");
                m->mothurOut("The template and candidate parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
                m->mothurOut("The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer and blast. The default is kmer.\n");
                m->mothurOut("The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
@@ -223,17 +293,16 @@ int AlignCommand::execute(){
                        m->mothurOutEndLine();
                        alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
                }
-               vector<string> outputNames;
                
                for (int s = 0; s < candidateFileNames.size(); s++) {
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { outputTypes.clear(); return 0; }
                        
                        m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine();
                        
-                       if (outputDir == "") {  outputDir += hasPath(candidateFileNames[s]); }
-                       string alignFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align";
-                       string reportFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align.report";
-                       string accnosFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "flip.accnos";
+                       if (outputDir == "") {  outputDir += m->hasPath(candidateFileNames[s]); }
+                       string alignFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align";
+                       string reportFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align.report";
+                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "flip.accnos";
                        bool hasAccnos = true;
                        
                        int numFastaSeqs = 0;
@@ -241,7 +310,7 @@ int AlignCommand::execute(){
                        int start = time(NULL);
                
 #ifdef USE_MPI 
-                               int pid, end, numSeqsPerProcessor; 
+                               int pid, numSeqsPerProcessor; 
                                int tag = 2001;
                                vector<unsigned long int> MPIPos;
                                MPIWroteAccnos = false;
@@ -275,11 +344,11 @@ int AlignCommand::execute(){
                                MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport);
                                MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                                
-                               if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); return 0; }
+                               if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }
                                
                                if (pid == 0) { //you are the root process 
                                        
-                                       MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
+                                       MPIPos = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
                                        
                                        //send file positions to all processes
                                        for(int i = 1; i < processors; i++) { 
@@ -295,7 +364,7 @@ int AlignCommand::execute(){
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
                                        
-                                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); return 0; }
+                                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }
 
                                        for (int i = 1; i < processors; i++) {
                                                bool tempResult;
@@ -317,7 +386,7 @@ int AlignCommand::execute(){
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
                                        
-                                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); return 0; }
+                                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }
 
                                        MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
                                }
@@ -346,8 +415,8 @@ int AlignCommand::execute(){
                                }
                                
 #else
-               vector<unsigned long int> positions = divideFile(candidateFileNames[s], processors);
-                               
+
+               vector<unsigned long int> positions = m->divideFile(candidateFileNames[s], processors);
                for (int i = 0; i < (positions.size()-1); i++) {
                        lines.push_back(new linePair(positions[i], positions[(i+1)]));
                }       
@@ -355,10 +424,10 @@ int AlignCommand::execute(){
                        if(processors == 1){
                                numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
                                
-                               if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
+                               if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); outputTypes.clear(); return 0; }
                                
                                //delete accnos file if its blank else report to user
-                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+                               if (m->isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
                                else { 
                                        m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
                                        if (!flip) {
@@ -386,7 +455,7 @@ int AlignCommand::execute(){
                                vector<string> nonBlankAccnosFiles;
                                //delete blank accnos files generated with multiple processes
                                for(int i=0;i<processors;i++){  
-                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+                                       if (!(m->isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
                                                nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
                                        }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
                                }
@@ -406,15 +475,15 @@ int AlignCommand::execute(){
                                        m->mothurOutEndLine();
                                }else{ hasAccnos = false;  }
                                
-                               if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
+                               if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); outputTypes.clear(); return 0; }
                        }
        #else
                        numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
                        
-                       if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
+                       if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); outputTypes.clear();  return 0; }
                        
                        //delete accnos file if its blank else report to user
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+                       if (m->isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
                        else { 
                                m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
                                if (!flip) {
@@ -434,9 +503,9 @@ int AlignCommand::execute(){
                        if (pid == 0) { //only one process should output to screen
                #endif
 
-                       outputNames.push_back(alignFileName);
-                       outputNames.push_back(reportFileName);
-                       if (hasAccnos)  {       outputNames.push_back(accnosFileName);          }
+                       outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName);
+                       outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName);
+                       if (hasAccnos)  {       outputNames.push_back(accnosFileName);  outputTypes["accnos"].push_back(accnosFileName);  }
                        
                #ifdef USE_MPI
                        }
@@ -466,15 +535,15 @@ int AlignCommand::execute(){
 int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){
        try {
                ofstream alignmentFile;
-               openOutputFile(alignFName, alignmentFile);
+               m->openOutputFile(alignFName, alignmentFile);
                
                ofstream accnosFile;
-               openOutputFile(accnosFName, accnosFile);
+               m->openOutputFile(accnosFName, accnosFile);
                
                NastReport report(reportFName);
                
                ifstream inFASTA;
-               openInputFile(filename, inFASTA);
+               m->openInputFile(filename, inFASTA);
 
                inFASTA.seekg(filePos->start);
 
@@ -485,8 +554,9 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        
                        if (m->control_pressed) {  return 0; }
                        
-                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
-       
+                       Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
+                       report.setCandidate(candidateSeq);
+
                        int origNumBases = candidateSeq->getNumBases();
                        string originalUnaligned = candidateSeq->getUnaligned();
                        int numBasesNeeded = origNumBases * threshold;
@@ -498,10 +568,11 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                                                
                                Sequence temp = templateDB->findClosestSequence(candidateSeq);
                                Sequence* templateSeq = &temp;
-                               
+                       
                                float searchScore = templateDB->getSearchScore();
                                                                
                                Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+               
                                Sequence* copy;
                                
                                Nast* nast2;
@@ -516,6 +587,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                        string wasBetter =  "";
                                        //if the user wants you to try the reverse
                                        if (flip) {
+                               
                                                //get reverse compliment
                                                copy = new Sequence(candidateSeq->getName(), originalUnaligned);
                                                copy->reverseComplement();
@@ -547,7 +619,6 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                        accnosFile << candidateSeq->getName() << wasBetter << endl;
                                }
                                
-                               report.setCandidate(candidateSeq);
                                report.setTemplate(templateSeq);
                                report.setSearchParameters(search, searchScore);
                                report.setAlignmentParameters(align, alignment);
@@ -563,8 +634,12 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        }
                        delete candidateSeq;
                        
-                       unsigned long int pos = inFASTA.tellg();
-                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               unsigned long int pos = inFASTA.tellg();
+                               if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       #else
+                               if (inFASTA.eof()) { break; }
+                       #endif
                        
                        //report progress
                        if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
@@ -631,7 +706,8 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                        istringstream iss (tempBuf,istringstream::in);
 
                        Sequence* candidateSeq = new Sequence(iss);  
-       
+                       report.setCandidate(candidateSeq);
+
                        int origNumBases = candidateSeq->getNumBases();
                        string originalUnaligned = candidateSeq->getUnaligned();
                        int numBasesNeeded = origNumBases * threshold;
@@ -680,6 +756,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                                        delete nast;
                                                        nast = nast2;
                                                        needToDeleteCopy = true;
+                                                       wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
                                                }else{  
                                                        wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
                                                        delete nast2;
@@ -701,7 +778,6 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                        MPIWroteAccnos = true;
                                }
                                
-                               report.setCandidate(candidateSeq);
                                report.setTemplate(templateSeq);
                                report.setSearchParameters(search, searchScore);
                                report.setAlignmentParameters(align, alignment);
@@ -768,13 +844,17 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                                
                                //pass numSeqs to parent
                                ofstream out;
-                               string tempFile = toString(getpid()) + ".temp";
-                               openOutputFile(tempFile, out);
+                               string tempFile = alignFileName + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
                                out << num << endl;
                                out.close();
                                
                                exit(0);
-                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
                }
                
                //force parent to wait until all the processes are done
@@ -785,8 +865,8 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                
                for (int i = 0; i < processIDS.size(); i++) {
                        ifstream in;
-                       string tempFile =  toString(processIDS[i]) + ".temp";
-                       openInputFile(tempFile, in);
+                       string tempFile =  alignFileName + toString(processIDS[i]) + ".num.temp";
+                       m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
                        in.close(); remove(tempFile.c_str());
                }
@@ -806,8 +886,8 @@ void AlignCommand::appendAlignFiles(string temp, string filename) {
                
                ofstream output;
                ifstream input;
-               openOutputFileAppend(filename, output);
-               openInputFile(temp, input);
+               m->openOutputFileAppend(filename, output);
+               m->openInputFile(temp, input);
                
                while(char c = input.get()){
                        if(input.eof())         {       break;                  }
@@ -829,8 +909,8 @@ void AlignCommand::appendReportFiles(string temp, string filename) {
                
                ofstream output;
                ifstream input;
-               openOutputFileAppend(filename, output);
-               openInputFile(temp, input);
+               m->openOutputFileAppend(filename, output);
+               m->openInputFile(temp, input);
 
                while (!input.eof())    {       char c = input.get(); if (c == 10 || c == 13){  break;  }       } // get header line