+//**********************************************************************************************************************\r
+int SffInfoCommand::parseSffTxt() {\r
+ try {\r
+ \r
+ ifstream inSFF;\r
+ m->openInputFile(sfftxtFilename, inSFF);\r
+ \r
+ if (outputDir == "") { outputDir += m->hasPath(sfftxtFilename); }\r
+ \r
+ //output file names\r
+ ofstream outFasta, outQual, outFlow;\r
+ string outFastaFileName, outQualFileName;\r
+ string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));\r
+ if (fileRoot.length() > 0) {\r
+ //rip off last .\r
+ fileRoot = fileRoot.substr(0, fileRoot.length()-1);\r
+ fileRoot = m->getRootName(fileRoot);\r
+ }\r
+ \r
+ map<string, string> variables; \r
+ variables["[filename]"] = fileRoot;\r
+ string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
+ string outFlowFileName = getOutputFileName("flow",variables);\r
+ if (!trim) { variables["[tag]"] = "raw"; }\r
+ outFastaFileName = getOutputFileName("fasta",variables);\r
+ outQualFileName = getOutputFileName("qfile",variables);\r
+ \r
+ if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
+ if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); }\r
+ if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); }\r
+ \r
+ //read common header\r
+ string commonHeader = m->getline(inSFF);\r
+ string magicNumber = m->getline(inSFF); \r
+ string version = m->getline(inSFF);\r
+ string indexOffset = m->getline(inSFF);\r
+ string indexLength = m->getline(inSFF);\r
+ int numReads = parseHeaderLineToInt(inSFF);\r
+ string headerLength = m->getline(inSFF);\r
+ string keyLength = m->getline(inSFF);\r
+ int numFlows = parseHeaderLineToInt(inSFF);\r
+ string flowgramCode = m->getline(inSFF);\r
+ string flowChars = m->getline(inSFF);\r
+ string keySequence = m->getline(inSFF);\r
+ m->gobble(inSFF);\r
+ \r
+ string seqName;\r
+ \r
+ if (flow) { outFlow << numFlows << endl; }\r
+ \r
+ for(int i=0;i<numReads;i++){\r
+ \r
+ //sanity check\r
+ if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }\r
+ \r
+ Header header;\r
+ \r
+ //parse read header\r
+ inSFF >> seqName;\r
+ seqName = seqName.substr(1);\r
+ m->gobble(inSFF);\r
+ header.name = seqName;\r
+ \r
+ string runPrefix = parseHeaderLineToString(inSFF); header.timestamp = runPrefix;\r
+ string regionNumber = parseHeaderLineToString(inSFF); header.region = regionNumber;\r
+ string xyLocation = parseHeaderLineToString(inSFF); header.xy = xyLocation;\r
+ m->gobble(inSFF);\r
+ \r
+ string runName = parseHeaderLineToString(inSFF);\r
+ string analysisName = parseHeaderLineToString(inSFF);\r
+ string fullPath = parseHeaderLineToString(inSFF);\r
+ m->gobble(inSFF);\r
+ \r
+ string readHeaderLen = parseHeaderLineToString(inSFF); convert(readHeaderLen, header.headerLength);\r
+ string nameLength = parseHeaderLineToString(inSFF); convert(nameLength, header.nameLength);\r
+ int numBases = parseHeaderLineToInt(inSFF); header.numBases = numBases;\r
+ string clipQualLeft = parseHeaderLineToString(inSFF); convert(clipQualLeft, header.clipQualLeft);\r
+ int clipQualRight = parseHeaderLineToInt(inSFF); header.clipQualRight = clipQualRight;\r
+ string clipAdapLeft = parseHeaderLineToString(inSFF); convert(clipAdapLeft, header.clipAdapterLeft);\r
+ string clipAdapRight = parseHeaderLineToString(inSFF); convert(clipAdapRight, header.clipAdapterRight);\r
+ m->gobble(inSFF);\r
+ \r
+ seqRead read;\r
+ \r
+ //parse read\r
+ vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); read.flowgram = flowVector;\r
+ vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); \r
+ \r
+ //adjust for print\r
+ vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);\r
+ for (int j = 1; j < flowIndices.size(); j++) { flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]); }\r
+ read.flowIndex = flowIndicesAdjusted;\r
+ \r
+ string bases = parseHeaderLineToString(inSFF); read.bases = bases;\r
+ vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases); read.qualScores = qualityScores;\r
+ m->gobble(inSFF);\r
+ \r
+ //if you have provided an accosfile and this seq is not in it, then dont print\r
+ bool print = true;\r
+ if (seqNames.size() != 0) { if (seqNames.count(header.name) == 0) { print = false; } }\r
+ \r
+ //print \r
+ if (print) {\r
+ if (fasta) { printFastaSeqData(outFasta, read, header); }\r
+ if (qual) { printQualSeqData(outQual, read, header); }\r
+ if (flow) { printFlowSeqData(outFlow, read, header); }\r
+ }\r
+ \r
+ //report progress\r
+ if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }\r
+ \r
+ if (m->control_pressed) { break; }\r
+ }\r
+ \r
+ //report progress\r
+ if (!m->control_pressed) { if((numReads) % 10000 != 0){ m->mothurOut(toString(numReads)); m->mothurOutEndLine(); } }\r
+ \r
+ inSFF.close();\r
+ \r
+ if (fasta) { outFasta.close(); }\r
+ if (qual) { outQual.close(); }\r
+ if (flow) { outFlow.close(); }\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseSffTxt");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+\r
+int SffInfoCommand::parseHeaderLineToInt(ifstream& file){\r
+ try {\r
+ int number;\r
+ \r
+ while (!file.eof()) {\r
+ \r
+ char c = file.get(); \r
+ if (c == ':'){\r
+ file >> number;\r
+ break;\r
+ }\r
+ \r
+ }\r
+ m->gobble(file);\r
+ return number;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");\r
+ exit(1);\r
+ }\r
+ \r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+string SffInfoCommand::parseHeaderLineToString(ifstream& file){\r
+ try {\r
+ string text;\r
+ \r
+ while (!file.eof()) {\r
+ char c = file.get(); \r
+ \r
+ if (c == ':'){\r
+ //m->gobble(file);\r
+ //text = m->getline(file); \r
+ file >> text;\r
+ break;\r
+ }\r
+ }\r
+ m->gobble(file);\r
+ \r
+ return text;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){\r
+ try {\r
+ vector<unsigned short> floatVector(length);\r
+ \r
+ while (!file.eof()) {\r
+ char c = file.get(); \r
+ if (c == ':'){\r
+ float temp;\r
+ for(int i=0;i<length;i++){\r
+ file >> temp;\r
+ floatVector[i] = temp * 100;\r
+ }\r
+ break;\r
+ }\r
+ }\r
+ m->gobble(file); \r
+ return floatVector;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){\r
+ try {\r
+ vector<unsigned int> intVector(length);\r
+ \r
+ while (!file.eof()) {\r
+ char c = file.get(); \r
+ if (c == ':'){\r
+ for(int i=0;i<length;i++){\r
+ file >> intVector[i];\r
+ }\r
+ break;\r
+ }\r
+ }\r
+ m->gobble(file); \r
+ return intVector;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");\r
+ exit(1);\r
+ }\r
+}\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readOligos(string oligoFile){\r
+ try {\r
+ filehandles.clear();\r
+ numSplitReads.clear();\r
+ filehandlesHeaders.clear();\r
+ \r
+ ifstream inOligos;\r
+ m->openInputFile(oligoFile, inOligos);\r
+ \r
+ string type, oligo, group;\r
+ \r
+ int indexPrimer = 0;\r
+ int indexBarcode = 0;\r
+ \r
+ while(!inOligos.eof()){\r
+ \r
+ inOligos >> type; \r
+ \r
+ if(type[0] == '#'){\r
+ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there\r
+ m->gobble(inOligos);\r
+ }\r
+ else{\r
+ m->gobble(inOligos);\r
+ //make type case insensitive\r
+ for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }\r
+ \r
+ inOligos >> oligo;\r
+ \r
+ for(int i=0;i<oligo.length();i++){\r
+ oligo[i] = toupper(oligo[i]);\r
+ if(oligo[i] == 'U') { oligo[i] = 'T'; }\r
+ }\r
+ \r
+ if(type == "FORWARD"){\r
+ group = "";\r
+ \r
+ // get rest of line in case there is a primer name\r
+ while (!inOligos.eof()) { \r
+ char c = inOligos.get(); \r
+ if (c == 10 || c == 13 || c == -1){ break; }\r
+ else if (c == 32 || c == 9){;} //space or tab\r
+ else { group += c; }\r
+ } \r
+ \r
+ //check for repeat barcodes\r
+ map<string, int>::iterator itPrime = primers.find(oligo);\r
+ if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
+ \r
+ primers[oligo]=indexPrimer; indexPrimer++; \r
+ primerNameVector.push_back(group);\r
+ }else if(type == "REVERSE"){\r
+ //Sequence oligoRC("reverse", oligo);\r
+ //oligoRC.reverseComplement();\r
+ string oligoRC = reverseOligo(oligo);\r
+ revPrimer.push_back(oligoRC);\r
+ }\r
+ else if(type == "BARCODE"){\r
+ inOligos >> group;\r
+ \r
+ //check for repeat barcodes\r
+ map<string, int>::iterator itBar = barcodes.find(oligo);\r
+ if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
+ \r
+ barcodes[oligo]=indexBarcode; indexBarcode++;\r
+ barcodeNameVector.push_back(group);\r
+ }else if(type == "LINKER"){\r
+ linker.push_back(oligo);\r
+ }else if(type == "SPACER"){\r
+ spacer.push_back(oligo);\r
+ }\r
+ else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
+ }\r
+ m->gobble(inOligos);\r
+ } \r
+ inOligos.close();\r
+ \r
+ if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; }\r
+ \r
+ //add in potential combos\r
+ if(barcodeNameVector.size() == 0){\r
+ barcodes[""] = 0;\r
+ barcodeNameVector.push_back(""); \r
+ }\r
+ \r
+ if(primerNameVector.size() == 0){\r
+ primers[""] = 0;\r
+ primerNameVector.push_back(""); \r
+ }\r
+ \r
+ filehandles.resize(barcodeNameVector.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ for (int j = 0; j < primerNameVector.size(); j++) {\r
+ ofstream* temp;\r
+ map<string, ofstream*> myMap; myMap[""] = temp;\r
+ filehandles[i].push_back(myMap);\r
+ }\r
+ }\r
+ \r
+ if(split > 1){\r
+ set<string> uniqueNames; //used to cleanup outputFileNames\r
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
+ \r
+ string primerName = primerNameVector[itPrimer->second];\r
+ string barcodeName = barcodeNameVector[itBar->second];\r
+ \r
+ string comboGroupName = "";\r
+ string fastaFileName = "";\r
+ string qualFileName = "";\r
+ string nameFileName = "";\r
+ \r
+ if(primerName == ""){\r
+ comboGroupName = barcodeNameVector[itBar->second];\r
+ }\r
+ else{\r
+ if(barcodeName == ""){\r
+ comboGroupName = primerNameVector[itPrimer->second];\r
+ }\r
+ else{\r
+ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
+ }\r
+ }\r
+ \r
+ ofstream* temp = new ofstream;\r
+ map<string, string> variables; \r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = comboGroupName;\r
+ string thisFilename = getOutputFileName("sff",variables);\r
+ if (uniqueNames.count(thisFilename) == 0) {\r
+ outputNames.push_back(thisFilename);\r
+ outputTypes["sff"].push_back(thisFilename);\r
+ uniqueNames.insert(thisFilename);\r
+ }\r
+ \r
+ map<string, ofstream*> myMap; myMap[thisFilename] = temp;\r
+ m->openOutputFileBinary(thisFilename, *(temp));\r
+ filehandles[itBar->second][itPrimer->second] = myMap;\r
+ map<string, ofstream*>::iterator itOfstream = filehandles[itBar->second][itPrimer->second].find("");\r
+ if (itOfstream != filehandles[itBar->second][itPrimer->second].end()) { filehandles[itBar->second][itPrimer->second].erase(itOfstream); } //remove blank entry so we dont mess with .begin() above. code above assumes only 1 file name in the map\r
+ }\r
+ }\r
+ }\r
+ numFPrimers = primers.size();\r
+ numLinkers = linker.size();\r
+ numSpacers = spacer.size();\r
+ map<string, string> variables; \r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = "scrap";\r
+ noMatchFile = getOutputFileName("sff",variables);\r
+ m->mothurRemove(noMatchFile);\r
+ numNoMatch = 0;\r
+ \r
+ bool allBlank = true;\r
+ for (int i = 0; i < barcodeNameVector.size(); i++) {\r
+ if (barcodeNameVector[i] != "") {\r
+ allBlank = false;\r
+ break;\r
+ }\r
+ }\r
+ for (int i = 0; i < primerNameVector.size(); i++) {\r
+ if (primerNameVector[i] != "") {\r
+ allBlank = false;\r
+ break;\r
+ }\r
+ }\r
+ \r
+ filehandlesHeaders.resize(filehandles.size());\r
+ numSplitReads.resize(filehandles.size());\r
+ for (int i = 0; i < filehandles.size(); i++) { \r
+ numSplitReads[i].resize(filehandles[i].size(), 0); \r
+ for (int j = 0; j < filehandles[i].size(); j++) {\r
+ ofstream* temp = new ofstream;\r
+ map<string, ofstream* > myMap;\r
+ string thisHeader = (filehandles[i][j].begin())->first+"headers";\r
+ myMap[thisHeader] = temp;\r
+ m->openOutputFileBinary(thisHeader, *(temp));\r
+ filehandlesHeaders[i].push_back(myMap);\r
+ }\r
+ }\r
+ \r
+ if (allBlank) {\r
+ m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a split the sff file."); m->mothurOutEndLine();\r
+ split = 1;\r
+ return false;\r
+ }\r
+ \r
+ return true;\r
+ \r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readOligos");\r
+ exit(1);\r
+ }\r
+}\r
+//********************************************************************/\r
+string SffInfoCommand::reverseOligo(string oligo){\r
+ try {\r
+ string reverse = "";\r
+ \r
+ for(int i=oligo.length()-1;i>=0;i--){\r
+ \r
+ if(oligo[i] == 'A') { reverse += 'T'; }\r
+ else if(oligo[i] == 'T'){ reverse += 'A'; }\r
+ else if(oligo[i] == 'U'){ reverse += 'A'; }\r
+ \r
+ else if(oligo[i] == 'G'){ reverse += 'C'; }\r
+ else if(oligo[i] == 'C'){ reverse += 'G'; }\r
+ \r
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }\r
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }\r
+ \r
+ else if(oligo[i] == 'M'){ reverse += 'K'; }\r
+ else if(oligo[i] == 'K'){ reverse += 'M'; }\r
+ \r
+ else if(oligo[i] == 'W'){ reverse += 'W'; }\r
+ else if(oligo[i] == 'S'){ reverse += 'S'; }\r
+ \r
+ else if(oligo[i] == 'B'){ reverse += 'V'; }\r
+ else if(oligo[i] == 'V'){ reverse += 'B'; }\r
+ \r
+ else if(oligo[i] == 'D'){ reverse += 'H'; }\r
+ else if(oligo[i] == 'H'){ reverse += 'D'; }\r
+ \r
+ else { reverse += 'N'; }\r
+ }\r
+ \r
+ \r
+ return reverse;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "reverseOligo");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+\r
+ \r
+ \r