X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sracommand.cpp;h=d840dc8cfc59a62317600635fca3b601a7de2d5f;hb=HEAD;hp=02e98991c892e922bc7c5a8f8234739e8b42c40a;hpb=f7184748e1519090deecfb6dd9fda118ffba2b53;p=mothur.git diff --git a/sracommand.cpp b/sracommand.cpp index 02e9899..d840dc8 100644 --- a/sracommand.cpp +++ b/sracommand.cpp @@ -18,6 +18,7 @@ vector SRACommand::setParameters(){ CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile); CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq); CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark); //choose only one multiple options CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform); @@ -51,7 +52,7 @@ string SRACommand::getHelpString(){ try { string helpString = ""; helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n"; - helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n"; + helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, checkorient, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n"; helpString += "The sff parameter is used to provide the original sff file.\n"; helpString += "The fastq parameter is used to provide the original fastq file.\n"; helpString += "The project parameter is used to provide your project file.\n"; @@ -63,6 +64,7 @@ string SRACommand::getHelpString(){ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n"; helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n"; helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n"; @@ -134,7 +136,7 @@ SRACommand::SRACommand(string option) { outputTypes["xml"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); + inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { @@ -284,6 +286,8 @@ SRACommand::SRACommand(string option) { m->mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } + + checkorient = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } } @@ -313,6 +317,8 @@ int SRACommand::execute(){ else if (sfffile != "") { parseSffFile(filesBySample); } else if (fastqfile != "") { parseFastqFile(filesBySample); } + for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); } + sanityCheckMiMarksGroups(); //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files. @@ -386,7 +392,7 @@ int SRACommand::execute(){ //////////////////////////////////////////////////////// for (int i = 0; i < Groups.size(); i++) { - string barcodeForThisSample = Group2Barcode[Groups[i]]; + string barcodeForThisSample = Group2Barcode[Groups[i]][0]; if (m->control_pressed) { break; } out << "\t\n"; @@ -430,7 +436,7 @@ int SRACommand::execute(){ for (int i = 0; i < Groups.size(); i++) { vector thisGroupsFiles = filesBySample[Groups[i]]; - string barcodeForThisSample = Group2Barcode[Groups[i]]; + string barcodeForThisSample = Group2Barcode[Groups[i]][0]; for (int j = 0; j < thisGroupsFiles.size(); j++) { string libId = thisGroupsFiles[j] + "." + barcodeForThisSample; @@ -444,10 +450,10 @@ int SRACommand::execute(){ out << "\t\t\t\n"; out << "\t\t\t\tgeneric-data \n"; out << "\t\t\t\n"; - vector thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]], thisBarcodes, '.'); + vector thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]][0], thisBarcodes, '.'); string forwardBarcode = thisBarcodes[0]; string reverseBarcode = thisBarcodes[1]; - vector thisPrimers; m->splitAtChar(Group2Primer[Groups[i]], thisPrimers, '.'); + vector thisPrimers; m->splitAtChar(Group2Primer[Groups[i]][0], thisPrimers, '.'); string forwardPrimer = thisPrimers[0]; string reversePrimer = thisPrimers[1]; //attributes @@ -484,8 +490,8 @@ int SRACommand::execute(){ out << "\t\t\t\n"; //attributes out << "\t\t\t" + mimarks[Groups[i]]["title"] + "\n"; - out << "\t\t\t" + Group2Barcode[Groups[i]] + "\n"; - out << "\t\t\t" + Group2Primer[Groups[i]] + "\n"; + out << "\t\t\t" + Group2Barcode[Groups[i]][0] + "\n"; + out << "\t\t\t" + Group2Primer[Groups[i]][0] + "\n"; out << "\t\t\t" + orientation + "\n"; out << "\t\t\t" + libId + "\n"; out << "\t\t\t" + libStrategy + "\n"; @@ -706,7 +712,7 @@ int SRACommand::readMIMarksFile(){ if (headers[i] == "organism") { if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism organismError = true; - m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to metagenome. You can correct the issue and rerun the command, or NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i]; + m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to acceptable 'metagenome'. NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i]; } Group2Organism[linePieces[0]] = linePieces[i]; } @@ -747,7 +753,7 @@ int SRACommand::readMIMarksFile(){ string organismTypes = ""; for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; } organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1]; - m->mothurOut("[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n"); + m->mothurOut("\n[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n\n\n"); } return 0; @@ -814,12 +820,21 @@ int SRACommand::readFile(map >& files){ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); } + if (inputDir != "") { + string path = m->hasPath(thisFileName1); + if (path == "") { thisFileName1 = inputDir + thisFileName1; } + + path = m->hasPath(thisFileName2); + if (path == "") { thisFileName2 = inputDir + thisFileName2; } + } + //check to make sure both are able to be opened ifstream in2; int openForward = m->openInputFile(thisFileName1, in2, "noerror"); //if you can't open it, try default location if (openForward == 1) { + if (m->getDefaultPath() != "") { //default path is set string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1); m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine(); @@ -939,6 +954,7 @@ int SRACommand::parseSffFile(map >& files){ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); } if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); } if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); } + if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; } m->mothurOutEndLine(); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); @@ -984,6 +1000,7 @@ int SRACommand::parseFastqFile(map >& files){ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); } if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); } if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); } + if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; } m->mothurOutEndLine(); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); @@ -1070,245 +1087,113 @@ int SRACommand::checkGroups(map >& files){ //*************************************************************************************************************** int SRACommand::readOligos(){ try { - ifstream inOligos; - m->openInputFile(oligosfile, inOligos); - - string type, oligo, roligo, group; - bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false; - map pairedBarcodes; - map pairedPrimers; - map barcodes; - map primers; - vector linker; - vector spacer, revPrimer; - int indexPrimer = 0; - int indexBarcode = 0; - int indexPairedPrimer = 0; - int indexPairedBarcode = 0; - set uniquePrimers; - set uniqueBarcodes; - - while(!inOligos.eof()){ - - inOligos >> type; - - if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } - - if(type[0] == '#'){ - while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(inOligos); - } - else{ - m->gobble(inOligos); - //make type case insensitive - for(int i=0;i> oligo; - - if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } - - for(int i=0;i::iterator itPrime = primers.find(oligo); - if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } } - - primers[oligo] = indexPrimer; indexPrimer++; - primerNameVector.push_back(group); - } - else if (type == "PRIMER"){ - m->gobble(inOligos); - - inOligos >> roligo; - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } - - //check for repeat barcodes - string tempPair = oligo+roligo; - if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } - else { uniquePrimers.insert(tempPair); } - - if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } - - pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++; - primerNameVector.push_back(group); - hasPrimer = true; - } - else if(type == "REVERSE"){ - //Sequence oligoRC("reverse", oligo); - //oligoRC.reverseComplement(); - string oligoRC = reverseOligo(oligo); - revPrimer.push_back(oligoRC); - } - else if(type == "BARCODE"){ - inOligos >> group; - - //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs - //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info - - string temp = ""; - while (!inOligos.eof()) { - char c = inOligos.get(); - if (c == 10 || c == 13 || c == -1){ break; } - else if (c == 32 || c == 9){;} //space or tab - else { temp += c; } - } - - //then this is illumina data with 4 columns - if (temp != "") { - hasPairedBarcodes = true; - string reverseBarcode = group; //reverseOligo(group); //reverse barcode - group = temp; - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } - //check for repeat barcodes - string tempPair = oligo+reverseBarcode; - if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); } - else { uniqueBarcodes.insert(tempPair); } - - pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++; - barcodeNameVector.push_back(group); - }else { - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); - } - }else if(type == "LINKER"){ - linker.push_back(oligo); - }else if(type == "SPACER"){ - spacer.push_back(oligo); - } - else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } - } - m->gobble(inOligos); - } - inOligos.close(); - - if (hasPairedBarcodes || hasPrimer) { - pairedOligos = true; - if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; } - } - + Oligos oligos(oligosfile); - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primerNameVector.push_back(""); - } + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { pairedOligos = true; } + else { pairedOligos = false; } set uniqueNames; //used to cleanup outputFileNames if (pairedOligos) { - for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ - for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ + map barcodes = oligos.getPairedBarcodes(); + map primers = oligos.getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - string primerName = primerNameVector[itPrimer->first]; - string barcodeName = barcodeNameVector[itBar->first]; + string primerName = oligos.getPrimerName(itPrimer->first); + string barcodeName = oligos.getBarcodeName(itBar->first); if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; - string fastqFileName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->first]; - } - else{ + comboGroupName = barcodeName; + }else{ if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->first]; + comboGroupName = primerName; } else{ - comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; + comboGroupName = barcodeName + "." + primerName; } } uniqueNames.insert(comboGroupName); - Group2Barcode[comboGroupName] = (itBar->second).forward+"."+(itBar->second).reverse; - Group2Primer[comboGroupName] = (itPrimer->second).forward+"."+(itPrimer->second).reverse; + + map >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName); + if (itGroup2Barcode == Group2Barcode.end()) { + vector tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse); + Group2Barcode[comboGroupName] = tempBarcodes; + }else { + Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse); + } + + itGroup2Barcode = Group2Primer.find(comboGroupName); + if (itGroup2Barcode == Group2Primer.end()) { + vector tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse); + Group2Primer[comboGroupName] = tempPrimers; + }else { + Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse); + } } } } }else { + map barcodes = oligos.getBarcodes() ; + map primers = oligos.getPrimers(); for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; + string primerName = oligos.getPrimerName(itPrimer->second); + string barcodeName = oligos.getBarcodeName(itBar->second); if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; - string fastqFileName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - } - else{ + comboGroupName = barcodeName; + }else{ if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; + comboGroupName = primerName; } else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + comboGroupName = barcodeName + "." + primerName; } } uniqueNames.insert(comboGroupName); - Group2Barcode[comboGroupName] = itBar->first; - Group2Primer[comboGroupName] = itPrimer->first; + + map >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName); + if (itGroup2Barcode == Group2Barcode.end()) { + vector tempBarcodes; tempBarcodes.push_back(itBar->first); + Group2Barcode[comboGroupName] = tempBarcodes; + }else { + Group2Barcode[comboGroupName].push_back(itBar->first); + } + + itGroup2Barcode = Group2Primer.find(comboGroupName); + if (itGroup2Barcode == Group2Primer.end()) { + vector tempPrimers; tempPrimers.push_back(itPrimer->first); + Group2Primer[comboGroupName] = tempPrimers; + }else { + Group2Primer[comboGroupName].push_back(itPrimer->first); + } } } } } - - - if (m->debug) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } - for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); } + if (m->debug) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } return true;