]> git.donarmstrong.com Git - mothur.git/blobdiff - sffinfocommand.cpp
added sparseDistanceMatrix class. Modified cluster commands to use the new sparse...
[mothur.git] / sffinfocommand.cpp
index e008ce47d2153530af1c0ab09a585a94d4963662..08cf21e5d6b543684cfebe56c0cdaf8697139125 100644 (file)
@@ -55,6 +55,29 @@ string SffInfoCommand::getHelpString(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+string SffInfoCommand::getOutputFileNameTag(string type, string inputName=""){ 
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "fasta")            {   outputFileName =  "fasta";   }
+            else if (type == "flow")    {   outputFileName =  "flow";   }
+            else if (type == "sfftxt")        {   outputFileName =  "sff.txt";   }
+            else if (type == "qfile")       {   outputFileName =  "qual";   }
+             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
 
 
 //**********************************************************************************************************************
@@ -298,7 +321,6 @@ SffInfoCommand::SffInfoCommand(string option)  {
 //**********************************************************************************************************************
 int SffInfoCommand::execute(){
        try {
-               
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                for (int s = 0; s < filenames.size(); s++) {
@@ -307,6 +329,7 @@ int SffInfoCommand::execute(){
                        
                        int start = time(NULL);
                        
+            filenames[s] = m->getFullPathName(filenames[s]);
                        m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
                        
                        string accnos = "";
@@ -362,14 +385,17 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
 
                ofstream outSfftxt, outFasta, outQual, outFlow;
                string outFastaFileName, outQualFileName;
-               string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
-               string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
+        string rootName = outputDir + m->getRootName(m->getSimpleName(input));
+        if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
+        
+               string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("sfftxt");
+               string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("flow");
                if (trim) {
-                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
-                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
+                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("fasta");
+                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("qfile");
                }else{
-                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
-                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
+                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("fasta");
+                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("qfile");
                }
                
                if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
@@ -406,7 +432,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
                        //read data
                        seqRead read; 
                        readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
-                               
+            bool okay = sanityCheck(readheader, read);
+            if (!okay) { break; }
+            
                        //if you have provided an accosfile and this seq is not in it, then dont print
                        if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
                        
@@ -609,7 +637,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i
                                in.read(buffer, 2);
                                read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
                        }
-       
+            
                        //read flowIndex
                        read.flowIndex.resize(numBases);
                        for (int i = 0; i < numBases; i++) {  
@@ -741,11 +769,39 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) {
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {
+       try {
+        bool okay = true;
+        string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";
+        
+        if (header.clipQualLeft > read.bases.length()) {
+            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
+        }
+        if (header.clipQualRight > read.bases.length()) {
+            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
+        }
+        if (header.clipQualLeft > read.qualScores.size()) {
+            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
+        }
+        if (header.clipQualRight > read.qualScores.size()) {
+            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
+        }
+        
+        if (okay == false) {
+            m->mothurOut(message); m->mothurOutEndLine();
+        }
+        
+               return okay;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "sanityCheck");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
-               
                out << "Flowgram: ";
                for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
                
@@ -775,10 +831,9 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea
 //**********************************************************************************************************************
 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
-               
                string seq = read.bases;
                
-               if (trim) {
+        if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
                                seq = "NNNN";
                        }
@@ -908,17 +963,17 @@ int SffInfoCommand::parseSffTxt() {
                        fileRoot = m->getRootName(fileRoot);
                }
                
-               string outFlowFileName = outputDir + fileRoot + "flow";
+               string outFlowFileName = outputDir + fileRoot + getOutputFileNameTag("flow");
                if (trim) {
-                       outFastaFileName = outputDir + fileRoot + "fasta";
-                       outQualFileName = outputDir + fileRoot + "qual";
+                       outFastaFileName = outputDir + fileRoot + getOutputFileNameTag("fasta");
+                       outQualFileName = outputDir + fileRoot + getOutputFileNameTag("qfile");
                }else{
-                       outFastaFileName = outputDir + fileRoot + "raw.fasta";
-                       outQualFileName = outputDir + fileRoot + "raw.qual";
+                       outFastaFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("fasta");
+                       outQualFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("qfile");
                }
                
                if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
-               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
+               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
                if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
                
                //read common header