]> git.donarmstrong.com Git - mothur.git/blobdiff - sffinfocommand.cpp
modified trim.seqs to speed up the append with multiple processors
[mothur.git] / sffinfocommand.cpp
index bbfcfd4510443de7edc42b7527083a52fc032728..66409e88f9013abd8deb9fd2ba78f6bfdf249a38 100644 (file)
 #include "sffinfocommand.h"
 #include "endiannessmacros.h"
 
+//**********************************************************************************************************************
+vector<string> SffInfoCommand::getValidParameters(){   
+       try {
+               string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SffInfoCommand::SffInfoCommand(){      
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["flow"] = tempOutNames;
+               outputTypes["sfftxt"] = tempOutNames;
+               outputTypes["qual"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SffInfoCommand::getRequiredParameters(){        
+       try {
+               string Array[] =  {"sff"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SffInfoCommand::getRequiredFiles(){     
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 
 SffInfoCommand::SffInfoCommand(string option)  {
@@ -34,6 +85,13 @@ SffInfoCommand::SffInfoCommand(string option)  {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["flow"] = tempOutNames;
+                       outputTypes["sfftxt"] = tempOutNames;
+                       outputTypes["qual"] = tempOutNames;
+                       
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
                        
@@ -43,28 +101,43 @@ SffInfoCommand::SffInfoCommand(string option)  {
                        sffFilename = validParameter.validFile(parameters, "sff", false);
                        if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true;  }
                        else { 
-                               splitAtDash(sffFilename, filenames);
+                               m->splitAtDash(sffFilename, filenames);
                                
                                //go through files and make sure they are good, if not, then disregard them
                                for (int i = 0; i < filenames.size(); i++) {
                                        if (inputDir != "") {
-                                               string path = hasPath(filenames[i]);
+                                               string path = m->hasPath(filenames[i]);
                                                //if the user has not given a path then, add inputdir. else leave path alone.
                                                if (path == "") {       filenames[i] = inputDir + filenames[i];         }
                                        }
        
                                        ifstream in;
-                                       int ableToOpen = openInputFile(filenames[i], in, "noerror");
+                                       int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
                                
                                        //if you can't open it, try default location
                                        if (ableToOpen == 1) {
                                                if (m->getDefaultPath() != "") { //default path is set
-                                                       string tryPath = m->getDefaultPath() + getSimpleName(filenames[i]);
+                                                       string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
                                                        m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       ifstream in2;
+                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                       in2.close();
+                                                       filenames[i] = tryPath;
+                                               }
+                                       }
+                                       
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getOutputDir() != "") { //default path is set
+                                                       string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
+                                                       m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                       ifstream in2;
+                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                       in2.close();
                                                        filenames[i] = tryPath;
                                                }
                                        }
+                                       
                                        in.close();
                                        
                                        if (ableToOpen == 1) { 
@@ -83,25 +156,38 @@ SffInfoCommand::SffInfoCommand(string option)  {
                        if (accnosName == "not found") { accnosName = "";  }
                        else { 
                                hasAccnos = true;
-                               splitAtDash(accnosName, accnosFileNames);
+                               m->splitAtDash(accnosName, accnosFileNames);
                                
                                //go through files and make sure they are good, if not, then disregard them
                                for (int i = 0; i < accnosFileNames.size(); i++) {
                                        if (inputDir != "") {
-                                               string path = hasPath(accnosFileNames[i]);
+                                               string path = m->hasPath(accnosFileNames[i]);
                                                //if the user has not given a path then, add inputdir. else leave path alone.
                                                if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
                                        }
        
                                        ifstream in;
-                                       int ableToOpen = openInputFile(accnosFileNames[i], in, "noerror");
+                                       int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
                                
                                        //if you can't open it, try default location
                                        if (ableToOpen == 1) {
                                                if (m->getDefaultPath() != "") { //default path is set
-                                                       string tryPath = m->getDefaultPath() + getSimpleName(accnosFileNames[i]);
+                                                       string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
                                                        m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       ifstream in2;
+                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                       in2.close();
+                                                       accnosFileNames[i] = tryPath;
+                                               }
+                                       }
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getOutputDir() != "") { //default path is set
+                                                       string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
+                                                       m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                       ifstream in2;
+                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                       in2.close();
                                                        accnosFileNames[i] = tryPath;
                                                }
                                        }
@@ -124,19 +210,19 @@ SffInfoCommand::SffInfoCommand(string option)  {
                        }
                        
                        string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
-                       qual = isTrue(temp); 
+                       qual = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
-                       fasta = isTrue(temp); 
+                       fasta = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "F";                             }
-                       flow = isTrue(temp); 
+                       flow = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
-                       trim = isTrue(temp); 
+                       trim = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "sfftxt", false);                           if (temp == "not found"){       temp = "F";                             }
-                       sfftxt = isTrue(temp); 
+                       sfftxt = m->isTrue(temp); 
                }
        }
        catch(exception& e) {
@@ -210,27 +296,27 @@ int SffInfoCommand::execute(){
 int SffInfoCommand::extractSffInfo(string input, string accnos){
        try {
                
-               if (outputDir == "") {  outputDir += hasPath(input); }
+               if (outputDir == "") {  outputDir += m->hasPath(input); }
                
                if (accnos != "")       {  readAccnosFile(accnos);  }
                else                            {       seqNames.clear();               }
 
                ofstream outSfftxt, outFasta, outQual, outFlow;
                string outFastaFileName, outQualFileName;
-               string sfftxtFileName = outputDir + getRootName(getSimpleName(input)) + "sff.txt";
-               string outFlowFileName = outputDir + getRootName(getSimpleName(input)) + "flow";
+               string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
+               string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
                if (trim) {
-                       outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "fasta";
-                       outQualFileName = outputDir + getRootName(getSimpleName(input)) + "qual";
+                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
+                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
                }else{
-                       outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "raw.fasta";
-                       outQualFileName = outputDir + getRootName(getSimpleName(input)) + "raw.qual";
+                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
+                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
                }
                
-               if (sfftxt) { openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName); }
-               if (fasta)      { openOutputFile(outFastaFileName, outFasta);   outputNames.push_back(outFastaFileName); }
-               if (qual)       { openOutputFile(outQualFileName, outQual);             outputNames.push_back(outQualFileName);  }
-               if (flow)       { openOutputFile(outFlowFileName, outFlow);             outputNames.push_back(outFlowFileName);  }
+               if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
+               if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
+               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
+               if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outputTypes["flow"].push_back(outFlowFileName);  }
                
                ifstream in;
                in.open(input.c_str(), ios::binary);
@@ -405,12 +491,12 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){
                        char buffer4 [2];
                        in.read(buffer4, 2);
                        header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
+                       header.clipQualLeft = 5; 
                        
                        //read clip qual right
                        char buffer5 [2];
                        in.read(buffer5, 2);
                        header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
-                       if(header.clipQualRight == 0){  header.clipQualRight = numBases;        }
                        
                        //read clipAdapterLeft
                        char buffer6 [2];
@@ -429,6 +515,9 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){
                        if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
                        delete[] tempBuffer;
                        
+                       //extract info from name
+                       decodeName(header.timestamp, header.region, header.xy, header.name);
+                       
                        /* Pad to 8 chars */
                        unsigned long int spotInFile = in.tellg();
                        unsigned long int spot = (spotInFile + 7)& ~7;
@@ -499,6 +588,43 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i
        }
 }
 //**********************************************************************************************************************
+int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
+       try {
+               
+               string time = name.substr(0, 6);
+               unsigned int timeNum = m->fromBase36(time);
+                       
+               int q1 = timeNum / 60;
+               int sec = timeNum - 60 * q1;
+               int q2 = q1 / 60;
+               int minute = q1 - 60 * q2;
+               int q3 = q2 / 24;
+               int hr = q2 - 24 * q3;
+               int q4 = q3 / 32;
+               int day = q3 - 32 * q4;
+               int q5 = q4 / 13;
+               int mon = q4 - 13 * q5;
+               int year = 2000 + q5;
+               
+               timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
+               
+               region = name.substr(7, 2);
+               
+               string xyNum = name.substr(9);
+               unsigned int myXy = m->fromBase36(xyNum);
+               int x = myXy >> 12;
+               int y = myXy & 4095;
+               
+               xy = toString(x) + "_" + toString(y);
+                       
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "decodeName");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
        try {
        
@@ -526,9 +652,9 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) {
        try {
                
                out << ">" << header.name << endl;
-               out << "Run Prefix: " << endl;
-               out << "Region #:  " << endl;
-               out << "XY Location: " << endl << endl;
+               out << "Run Prefix: " << header.timestamp << endl;
+               out << "Region #:  " << header.region << endl;
+               out << "XY Location: " << header.xy << endl << endl;
                
                out << "Run Name:  " << endl;
                out << "Analysis Name:  " << endl;
@@ -562,6 +688,7 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea
                for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
                
                //make the bases you want to clip lowercase and the bases you want to keep upper case
+               if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
                for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
                for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
                for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
@@ -586,18 +713,27 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head
                string seq = read.bases;
                
                if (trim) {
-                       seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft+1));
+                       if(header.clipQualRight < header.clipQualLeft){
+                               seq = "NNNN";
+                       }
+                       else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
+                               seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
+                       }
+                       else {
+                               seq = seq.substr(header.clipQualLeft-1);
+                       }
                }else{
                        //if you wanted the sfftxt then you already converted the bases to the right case
                        if (!sfftxt) {
                                //make the bases you want to clip lowercase and the bases you want to keep upper case
+                               if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
                                for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
                                for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
                                for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
                        }
                }
                
-               out << ">" << header.name << endl;
+               out << ">" << header.name  << " xy=" << header.xy << endl;
                out << seq << endl;
                
                return 0;
@@ -613,10 +749,19 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
        try {
                
                if (trim) {
-                       out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft+1) << endl;
-                       for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t';  }
+                       if(header.clipQualRight < header.clipQualLeft){
+                               out << "0\t0\t0\t0";
+                       }
+                       else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
+                               out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
+                               for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
+                       }
+                       else{
+                               out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
+                               for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
+                       }
                }else{
-                       out << ">" << header.name << " length=" << read.qualScores.size() << endl;
+                       out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
                        for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
                }
                
@@ -634,7 +779,7 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
                
-               out << ">" << header.name << endl;
+               out << ">" << header.name << " xy=" << header.xy << endl;
                for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
                out << endl;
                
@@ -652,11 +797,11 @@ int SffInfoCommand::readAccnosFile(string filename) {
                seqNames.clear();
                
                ifstream in;
-               openInputFile(filename, in);
+               m->openInputFile(filename, in);
                string name;
                
                while(!in.eof()){
-                       in >> name; gobble(in);
+                       in >> name; m->gobble(in);
                                                
                        seqNames.insert(name);