X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=sffinfocommand.cpp;h=c80ac2e51859331f17f02bcdf5abdd3f89bec6f3;hp=c50255aeb2ae9c202074b78d89d460651cefdc43;hb=615301e57c25e241356a9c2380648d117709458d;hpb=cc19310422f125d6628980bd1148e1e816792382 diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index c50255a..c80ac2e 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -16,21 +16,21 @@ //********************************************************************************************************************** vector SffInfoCommand::setParameters(){ try { - CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff); - CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); - CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos); - CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt); - CommandParameter pflow("flow", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pflow); - CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim); - CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta); - CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); - CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); - CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs); - CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs); - CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff); + CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos); + CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); + CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt); + CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow); + CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim); + CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta); + CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile); + CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs); + CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs); + CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); + CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); + CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -70,32 +70,26 @@ string SffInfoCommand::getHelpString(){ exit(1); } } + //********************************************************************************************************************** -string SffInfoCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string SffInfoCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "fasta") { outputFileName = "fasta"; } - else if (type == "flow") { outputFileName = "flow"; } - else if (type == "sfftxt") { outputFileName = "sff.txt"; } - else if (type == "sff") { outputFileName = "sff"; } - else if (type == "qfile") { outputFileName = "qual"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "fasta") { pattern = "[filename],fasta-[filename],[tag],fasta"; } + else if (type == "flow") { pattern = "[filename],flow"; } + else if (type == "sfftxt") { pattern = "[filename],sff.txt"; } + else if (type == "sff") { pattern = "[filename],[group],sff"; } + else if (type == "qfile") { pattern = "[filename],qual-[filename],[tag],qual"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getOutputPattern"); + exit(1); + } } - - //********************************************************************************************************************** SffInfoCommand::SffInfoCommand(){ try { @@ -504,16 +498,14 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ string rootName = outputDir + m->getRootName(m->getSimpleName(input)); if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; } - string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("sfftxt"); - string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("flow"); - if (trim) { - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("fasta"); - outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("qfile"); - }else{ - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("fasta"); - outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("qfile"); - } - + map variables; + variables["[filename]"] = rootName; + string sfftxtFileName = getOutputFileName("sfftxt",variables); + string outFlowFileName = getOutputFileName("flow",variables); + if (!trim) { variables["[tag]"] = "raw"; } + outFastaFileName = getOutputFileName("fasta",variables); + outQualFileName = getOutputFileName("qfile",variables); + if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); } if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); } @@ -654,7 +646,7 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ char buffer3 [4]; in.read(buffer3, 4); header.indexLength = be_int4(*(unsigned int *)(&buffer3)); - + //read num reads char buffer4 [4]; in.read(buffer4, 4); @@ -749,9 +741,19 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,8); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { + unsigned long long offset = 0; + char* thisbuffer = new char[8]; + thisbuffer[0] = (offset >> 56) & 0xFF; + thisbuffer[1] = (offset >> 48) & 0xFF; + thisbuffer[2] = (offset >> 40) & 0xFF; + thisbuffer[3] = (offset >> 32) & 0xFF; + thisbuffer[4] = (offset >> 24) & 0xFF; + thisbuffer[5] = (offset >> 16) & 0xFF; + thisbuffer[6] = (offset >> 8) & 0xFF; + thisbuffer[7] = offset & 0xFF; ofstream out; m->openOutputFileAppend(filehandlesHeaders[i][j], out); - out.write(mybuffer, in.gcount()); + out.write(thisbuffer, 8); out.close(); } } @@ -765,7 +767,13 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; m->openOutputFileAppend(filehandlesHeaders[i][j], out); - out.write(mybuffer, in.gcount()); + int offset = 0; + char* thisbuffer = new char[4]; + thisbuffer[0] = (offset >> 24) & 0xFF; + thisbuffer[1] = (offset >> 16) & 0xFF; + thisbuffer[2] = (offset >> 8) & 0xFF; + thisbuffer[3] = offset & 0xFF; + out.write(thisbuffer, 4); out.close(); } } @@ -879,7 +887,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, spot-spotInFile); out.close(); } @@ -928,7 +936,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H char buffer5 [2]; in.read(buffer5, 2); header.clipQualRight = be_int2(*(unsigned short *)(&buffer5)); - + //read clipAdapterLeft char buffer6 [2]; in.read(buffer6, 2); @@ -997,7 +1005,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H char * mybuffer; mybuffer = new char [spot-startSpotInFile]; ifstream in2; - m->openInputFile(currentFileName, in2); + in2.open(currentFileName.c_str(), ios::binary); in2.seekg(startSpotInFile); in2.read(mybuffer,spot-startSpotInFile); in2.close(); @@ -1007,7 +1015,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H if(trashCodeLength == 0){ ofstream out; - m->openOutputFileAppend(filehandles[barcodeIndex][primerIndex], out); + m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out); out.write(mybuffer, in2.gcount()); out.close(); delete[] mybuffer; @@ -1015,7 +1023,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H } else{ ofstream out; - m->openOutputFileAppend(noMatchFile, out); + m->openOutputFileBinaryAppend(noMatchFile, out); out.write(mybuffer, in2.gcount()); out.close(); delete[] mybuffer; @@ -1047,7 +1055,11 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr if (trim) { if(header.clipQualRight < header.clipQualLeft){ - seq = "NNNN"; + if (header.clipQualRight == 0) { //don't trim right + seq = seq.substr(header.clipQualLeft-1); + }else { + seq = "NNNN"; + } } else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft)); @@ -1058,11 +1070,12 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr }else{ //if you wanted the sfftxt then you already converted the bases to the right case if (!sfftxt) { + int endValue = header.clipQualRight; //make the bases you want to clip lowercase and the bases you want to keep upper case - if(header.clipQualRight == 0){ header.clipQualRight = seq.length(); } + if(endValue == 0){ endValue = seq.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } - for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } - for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } + for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); } + for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } } } @@ -1243,10 +1256,11 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; } //make the bases you want to clip lowercase and the bases you want to keep upper case - if(header.clipQualRight == 0){ header.clipQualRight = read.bases.length(); } + int endValue = header.clipQualRight; + if(endValue == 0){ endValue = read.bases.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); } - for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { read.bases[i] = toupper(read.bases[i]); } - for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } + for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { read.bases[i] = toupper(read.bases[i]); } + for (int i = (endValue-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } out << endl << "Bases: " << read.bases << endl << "Quality Scores: "; for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } @@ -1268,7 +1282,11 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head if (trim) { if(header.clipQualRight < header.clipQualLeft){ - seq = "NNNN"; + if (header.clipQualRight == 0) { //don't trim right + seq = seq.substr(header.clipQualLeft-1); + }else { + seq = "NNNN"; + } } else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft)); @@ -1279,11 +1297,12 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head }else{ //if you wanted the sfftxt then you already converted the bases to the right case if (!sfftxt) { + int endValue = header.clipQualRight; //make the bases you want to clip lowercase and the bases you want to keep upper case - if(header.clipQualRight == 0){ header.clipQualRight = seq.length(); } + if(endValue == 0){ endValue = seq.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } - for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } - for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } + for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); } + for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } } } @@ -1304,8 +1323,13 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade if (trim) { if(header.clipQualRight < header.clipQualLeft){ - out << ">" << header.name << " xy=" << header.xy << endl; - out << "0\t0\t0\t0"; + if (header.clipQualRight == 0) { //don't trim right + out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + }else { + out << ">" << header.name << " xy=" << header.xy << endl; + out << "0\t0\t0\t0"; + } } else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; @@ -1333,15 +1357,21 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade //********************************************************************************************************************** int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { try { - if(header.clipQualRight > header.clipQualLeft){ - - int rightIndex = 0; - for (int i = 0; i < header.clipQualRight; i++) { rightIndex += read.flowIndex[i]; } - - out << header.name << ' ' << rightIndex; - for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); } - out << endl; - } + + int endValue = header.clipQualRight; + if (header.clipQualRight == 0) { + endValue = read.flowIndex.size(); + if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); } + } + if(endValue > header.clipQualLeft){ + + int rightIndex = 0; + for (int i = 0; i < endValue; i++) { rightIndex += read.flowIndex[i]; } + + out << header.name << ' ' << rightIndex; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); } + out << endl; + } return 0; @@ -1396,14 +1426,13 @@ int SffInfoCommand::parseSffTxt() { fileRoot = m->getRootName(fileRoot); } - string outFlowFileName = outputDir + fileRoot + getOutputFileNameTag("flow"); - if (trim) { - outFastaFileName = outputDir + fileRoot + getOutputFileNameTag("fasta"); - outQualFileName = outputDir + fileRoot + getOutputFileNameTag("qfile"); - }else{ - outFastaFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("fasta"); - outQualFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("qfile"); - } + map variables; + variables["[filename]"] = fileRoot; + string sfftxtFileName = getOutputFileName("sfftxt",variables); + string outFlowFileName = getOutputFileName("flow",variables); + if (!trim) { variables["[tag]"] = "raw"; } + outFastaFileName = getOutputFileName("fasta",variables); + outQualFileName = getOutputFileName("qfile",variables); if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); } @@ -1650,7 +1679,7 @@ bool SffInfoCommand::readOligos(string oligoFile){ // get rest of line in case there is a primer name while (!inOligos.eof()) { char c = inOligos.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -1731,7 +1760,10 @@ bool SffInfoCommand::readOligos(string oligoFile){ } ofstream temp; - string thisFilename = outputDir + m->getRootName(m->getSimpleName(currentFileName)) + comboGroupName + "." + getOutputFileNameTag("sff"); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); + variables["[group]"] = comboGroupName; + string thisFilename = getOutputFileName("sff",variables); if (uniqueNames.count(thisFilename) == 0) { outputNames.push_back(thisFilename); outputTypes["sff"].push_back(thisFilename); @@ -1746,7 +1778,10 @@ bool SffInfoCommand::readOligos(string oligoFile){ numFPrimers = primers.size(); numLinkers = linker.size(); numSpacers = spacer.size(); - noMatchFile = outputDir + m->getRootName(m->getSimpleName(currentFileName)) + "scrap." + getOutputFileNameTag("sff"); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); + variables["[group]"] = "scrap"; + noMatchFile = getOutputFileName("sff",variables); m->mothurRemove(noMatchFile); bool allBlank = true;