+//***************************************************************************************************************
+//maps group to file
+int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
+ try {
+
+ for (int i = 0; i < Groups.size(); i++) {
+
+ set<int> matches;
+ for (int j = 0; j < theseFiles.size(); j++) {
+ int pos = theseFiles[j].find(Groups[i]);
+ if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
+ if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
+ matches.insert(i);
+ }
+ }
+ }
+
+ if(matches.size() == 1) {
+ map<string, vector<string> >::iterator it = files.find(Groups[i]);
+ if (it == files.end()) {
+ vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
+ }else {
+ files[Groups[i]].push_back(theseFiles[*matches.begin()]);
+ }
+ }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkGroups");
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+//checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
+int SRACommand::checkGroups(map<string, vector<string> >& files){
+ try {
+ vector<string> newGroups;
+ for (int i = 0; i < Groups.size(); i++) {
+
+ map<string, vector<string> >::iterator it = files.find(Groups[i]);
+ //no files for this group, remove it
+ if (it == files.end()) { }
+ else { newGroups.push_back(Groups[i]); }
+ }
+
+ Groups = newGroups;
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkGroups");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+int SRACommand::readOligos(){
+ try {
+ Oligos oligos(oligosfile);
+
+ if (m->control_pressed) { return false; } //error in reading oligos
+
+ if (oligos.hasPairedBarcodes()) { pairedOligos = true; }
+ else { pairedOligos = false; }
+
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ if (pairedOligos) {
+ map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+ map<int, oligosPair> primers = oligos.getPairedPrimers();
+ for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = oligos.getPrimerName(itPrimer->first);
+ string barcodeName = oligos.getBarcodeName(itBar->first);
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
+ }
+ else{
+ comboGroupName = barcodeName + "." + primerName;
+ }
+ }
+ uniqueNames.insert(comboGroupName);
+
+ map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
+ if (itGroup2Barcode == Group2Barcode.end()) {
+ vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
+ Group2Barcode[comboGroupName] = tempBarcodes;
+ }else {
+ Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
+ }
+
+ itGroup2Barcode = Group2Primer.find(comboGroupName);
+ if (itGroup2Barcode == Group2Primer.end()) {
+ vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
+ Group2Primer[comboGroupName] = tempPrimers;
+ }else {
+ Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
+ }
+ }
+ }
+ }
+ }else {
+ map<string, int> barcodes = oligos.getBarcodes() ;
+ map<string, int> primers = oligos.getPrimers();
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = oligos.getPrimerName(itPrimer->second);
+ string barcodeName = oligos.getBarcodeName(itBar->second);
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
+ }
+ else{
+ comboGroupName = barcodeName + "." + primerName;
+ }
+ }
+ uniqueNames.insert(comboGroupName);
+
+ map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
+ if (itGroup2Barcode == Group2Barcode.end()) {
+ vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
+ Group2Barcode[comboGroupName] = tempBarcodes;
+ }else {
+ Group2Barcode[comboGroupName].push_back(itBar->first);
+ }
+
+ itGroup2Barcode = Group2Primer.find(comboGroupName);
+ if (itGroup2Barcode == Group2Primer.end()) {
+ vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
+ Group2Primer[comboGroupName] = tempPrimers;
+ }else {
+ Group2Primer[comboGroupName].push_back(itPrimer->first);
+ }
+ }
+ }
+ }
+ }
+
+ if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readOligos");
+ exit(1);
+ }
+}
+//********************************************************************/
+string SRACommand::reverseOligo(string oligo){
+ try {
+ string reverse = "";
+
+ for(int i=oligo.length()-1;i>=0;i--){
+
+ if(oligo[i] == 'A') { reverse += 'T'; }
+ else if(oligo[i] == 'T'){ reverse += 'A'; }
+ else if(oligo[i] == 'U'){ reverse += 'A'; }
+
+ else if(oligo[i] == 'G'){ reverse += 'C'; }
+ else if(oligo[i] == 'C'){ reverse += 'G'; }
+
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }
+
+ else if(oligo[i] == 'M'){ reverse += 'K'; }
+ else if(oligo[i] == 'K'){ reverse += 'M'; }
+
+ else if(oligo[i] == 'W'){ reverse += 'W'; }
+ else if(oligo[i] == 'S'){ reverse += 'S'; }
+
+ else if(oligo[i] == 'B'){ reverse += 'V'; }
+ else if(oligo[i] == 'V'){ reverse += 'B'; }
+
+ else if(oligo[i] == 'D'){ reverse += 'H'; }
+ else if(oligo[i] == 'H'){ reverse += 'D'; }
+
+ else { reverse += 'N'; }
+ }
+
+
+ return reverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "reverseOligo");
+ exit(1);
+ }
+}
+//********************************************************************/
+//_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
+bool SRACommand::checkCasesPlatforms(string& platform){
+ try {
+ string original = platform;
+ bool isOkay = true;
+
+ //remove users possible case errors
+ for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
+
+ //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
+
+ if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (platform == "454") { platform = "_LS454"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
+ }
+
+ return isOkay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkCasesPlatforms");
+ exit(1);
+ }
+}
+//********************************************************************/
+//454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
+bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
+ try {
+ string original = instrumentModel;
+ bool isOkay = true;
+
+ //remove users possible case errors
+ for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
+
+ //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
+ if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
+ if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+ if (isOkay) {
+ if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; }
+ if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+
+ }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
+ if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; }
+ if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; }
+ if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; }
+ if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; }
+ if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; }
+ if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+
+ }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
+ if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+ }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
+ if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ return isOkay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
+ exit(1);
+ }
+}