]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraperseuscommand.cpp
working on mgcluster count file change
[mothur.git] / chimeraperseuscommand.cpp
index 240ba65adf628099e8234ed7c8c7a692913d71ee..e3691e8b942c2f34da91cec14691e717d5c16c1a 100644 (file)
@@ -39,7 +39,7 @@ string ChimeraPerseusCommand::getHelpString(){
                helpString += "The chimera.perseus command reads a fastafile and namefile and outputs potentially chimeric sequences.\n";
                helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
-               helpString += "The name parameter allows you to provide a name file associated with your fasta file. If none is given unique.seqs will be run to generate one. \n";
+               helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
@@ -58,6 +58,27 @@ string ChimeraPerseusCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){  
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "chimera") {  outputFileName =  "perseus.chimeras"; }
+            else if (type == "accnos") {  outputFileName =  "perseus.accnos"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 ChimeraPerseusCommand::ChimeraPerseusCommand(){        
        try {
                abort = true; calledHelp = true;
@@ -345,16 +366,16 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        
                        string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
-                       convert(temp, cutoff);
+                       m->mothurConvert(temp, cutoff);
                        
                        temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
-                       convert(temp, alpha);
+                       m->mothurConvert(temp, alpha);
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
-                       convert(temp, beta);
+                       m->mothurConvert(temp, beta);
                }
        }
        catch(exception& e) {
@@ -376,8 +397,9 @@ int ChimeraPerseusCommand::execute(){
                        
                        int start = time(NULL); 
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera";
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "perseus.accnos";
+                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+
                        //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                        
                        //you provided a groupfile
@@ -466,14 +488,15 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){
                string inputString = "fasta=" + inputFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                nameFile = filenames["name"][0];
@@ -533,6 +556,7 @@ vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, str
                
                vector<seqData> sequences;
                bool error = false;
+        alignLength = 0;
                
                for (int i = 0; i < thisGroupsSeqs.size(); i++) {
                
@@ -543,6 +567,7 @@ vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, str
                        else {
                                int num = m->getNumNames(it->second);
                                sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
+                if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
                        }
                }
                
@@ -570,7 +595,8 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                bool error = false;
                ifstream in;
                m->openInputFile(inputFile, in);
-               
+               alignLength = 0;
+        
                while (!in.eof()) {
                        
                        if (m->control_pressed) { in.close(); return sequences; }
@@ -581,6 +607,7 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                        if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
                        else {
                                sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
+                if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
                        }
                }
                in.close();
@@ -625,7 +652,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                }
                
                int numSeqs = sequences.size();
-               int alignLength = sequences[0].sequence.size();
+               //int alignLength = sequences[0].sequence.size();
                
                ofstream chimeraFile;
                ofstream accnosFile;
@@ -640,9 +667,8 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                vector<bool> chimeras(numSeqs, 0);
                
                for(int i=0;i<numSeqs;i++){     
-                       cout << sequences[i].seqName << endl << sequences[i].sequence << endl << sequences[i].frequency << endl;
                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
-                       
+    
                        vector<bool> restricted = chimeras;
                        
                        vector<vector<int> > leftDiffs(numSeqs);
@@ -657,16 +683,16 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                        vector<pwAlign> alignments(numSeqs);
                        
                        int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
-                       cout << "comparisons = " << comparisons << endl;
                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                        int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
                        
                        string dummyA, dummyB;
                        
-                       if(comparisons >= 2){   
+            if (sequences[i].sequence.size() < 3) { 
+                chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
+            }else if(comparisons >= 2){        
                                minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
-                               cout << "minMismatchToChimera = " << minMismatchToChimera << endl;
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                int minMismatchToTrimera = numeric_limits<int>::max();
@@ -674,12 +700,11 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                                
                                if(minMismatchToChimera >= 3 && comparisons >= 3){
                                        minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
-                                       cout << "minMismatchToTrimera = " << minMismatchToTrimera << endl;
                                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
                                }
                                
                                double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
-                               cout << "singleDist = " << singleDist << endl;
+                               
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                string type;
@@ -693,16 +718,16 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                                        type = "chimera";
                                        chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
                                }
-                               cout << "chimeraRefSeq = " << chimeraRefSeq << endl;
+                               ;
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
                                
                                double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
-                               cout << "chimeraDist = " << chimeraDist << endl;
+                               
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
                                double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
-                               cout << "loonIndex = " << loonIndex << endl;
+                               
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
@@ -766,7 +791,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -841,7 +866,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-               cout << "here" << endl; 
+                       
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                        num += pDataArray[i]->count;