X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=createdatabasecommand.cpp;h=a3531661d47c5cc640b822d5ba176a0f50a61844;hp=57d5264b8b151d4175b6754ab2ee75732e1e597e;hb=499f4ac6e321f9f03d4c3aa25c3b6880892c8b83;hpb=ee8403d4eb5760187d62b42a9cf4272de8fc0ec4 diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp index 57d5264..a353166 100644 --- a/createdatabasecommand.cpp +++ b/createdatabasecommand.cpp @@ -12,14 +12,16 @@ //********************************************************************************************************************** vector CreateDatabaseCommand::setParameters(){ try { - CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname); - CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy); - CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none","database",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pcontaxonomy); + CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist); + CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -34,17 +36,18 @@ vector CreateDatabaseCommand::setParameters(){ string CreateDatabaseCommand::getHelpString(){ try { string helpString = ""; - helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n"; - helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n"; + helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, or count file and creates a database file.\n"; + helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group, count and label. List, repfasta, repnames or count, and contaxonomy are required.\n"; helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; - helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n"; + helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n"; + helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n"; helpString += "The group file is optional and will just give you the abundance breakdown by group.\n"; helpString += "The label parameter allows you to specify a label to be used from your listfile.\n"; helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n"; helpString += "The create.database command should be in the following format: \n"; helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n"; - helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n"; + helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, repname=final.an.0.03.rep.names, list=final.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n"; helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n"; return helpString; } @@ -54,6 +57,21 @@ string CreateDatabaseCommand::getHelpString(){ } } //********************************************************************************************************************** +string CreateDatabaseCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "database") { pattern = "[filename],database"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "CreateDatabaseCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** CreateDatabaseCommand::CreateDatabaseCommand(){ try { abort = true; calledHelp = true; @@ -138,6 +156,22 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["group"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } } @@ -146,14 +180,33 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option) { //check for required parameters listfile = validParameter.validFile(parameters, "list", true); - if (listfile == "not found") { - //if there is a current list file, use it + if (listfile == "not found") { listfile = ""; } + else if (listfile == "not open") { listfile = ""; abort = true; } + else { m->setListFile(listfile); } + + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not found") { sharedfile = ""; } + else if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else { m->setSharedFile(sharedfile); } + + if ((sharedfile == "") && (listfile == "")) { + //is there are current file available for either of these? + //give priority to list, then shared listfile = m->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; } + else { + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine(); + abort = true; + } + } } - else if (listfile == "not open") { abort = true; } - else { m->setListFile(listfile); } + else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true; } + + if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } } + else { if (outputDir == "") { outputDir = m->hasPath(listfile); } } contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true); if (contaxonomyfile == "not found") { //if there is a current list file, use it @@ -168,10 +221,23 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option) { else if (repfastafile == "not open") { repfastafile = ""; abort = true; } repnamesfile = validParameter.validFile(parameters, "repname", true); - if (repnamesfile == "not found") { //if there is a current list file, use it - repnamesfile = ""; m->mothurOut("The repnames parameter is required, aborting."); m->mothurOutEndLine(); abort = true; - } + if (repnamesfile == "not found") { repnamesfile = ""; } else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not found") { countfile = ""; } + else if (countfile == "not open") { countfile = ""; abort = true; } + + if ((countfile == "") && (repnamesfile == "")) { + //if there is a current name file, use it, else look for current count file + string repnamesfile = m->getNameFile(); + if (repnamesfile != "") { m->mothurOut("Using " + repnamesfile + " as input file for the repname parameter."); m->mothurOutEndLine(); } + else { + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("[ERROR]: You must provide a count or repname file."); m->mothurOutEndLine(); abort = true; } + } + } groupfile = validParameter.validFile(parameters, "group", true); if (groupfile == "not open") { groupfile = ""; abort = true; } @@ -198,7 +264,8 @@ int CreateDatabaseCommand::execute(){ //taxonomies holds the taxonomy info for each Otu //classifyOtuSizes holds the size info of each Otu to help with error checking vector taxonomies; - vector classifyOtuSizes = readTax(taxonomies); + vector otuLabels; + vector classifyOtuSizes = readTax(taxonomies, otuLabels); if (m->control_pressed) { return 0; } @@ -209,7 +276,32 @@ int CreateDatabaseCommand::execute(){ //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map. map repNames; - int numUniqueNamesFile = m->readNames(repnamesfile, repNames); + map nameMap; + int numUniqueNamesFile = 0; + CountTable ct; + if (countfile == "") { + numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1); + //the repnames file does not have the same order as the list file bins so we need to sort and reassemble for the search below + map tempRepNames; + for (map::iterator it = repNames.begin(); it != repNames.end();) { + string bin = it->first; + vector temp; + m->splitAtChar(bin, temp, ','); + sort(temp.begin(), temp.end()); + bin = ""; + for (int i = 0; i < temp.size()-1; i++) { + bin += temp[i] + ','; + } + bin += temp[temp.size()-1]; + tempRepNames[bin] = it->second; + repNames.erase(it++); + } + repNames = tempRepNames; + }else { + ct.readTable(countfile, true, false); + numUniqueNamesFile = ct.getNumUniqueSeqs(); + nameMap = ct.getNameMap(); + } //are there the same number of otus in the fasta and name files if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; } @@ -230,86 +322,174 @@ int CreateDatabaseCommand::execute(){ if (m->control_pressed) { return 0; } - //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile - ListVector* list = getList(); - - if (m->control_pressed) { delete list; return 0; } - GroupMap* groupmap = NULL; - if (groupfile != "") { - groupmap = new GroupMap(groupfile); - groupmap->readMap(); - } - - if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; } - - if (outputDir == "") { outputDir += m->hasPath(listfile); } - string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + "database"; + map variables; + if (listfile != "") { variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile)); } + else { variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); } + string outputFileName = getOutputFileName("database", variables); outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName); ofstream out; m->openOutputFile(outputFileName, out); string header = "OTUNumber\tAbundance\t"; - if (groupfile != "") { - header = "OTUNumber\t"; - for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; } - } - header += "repSeqName\trepSeq\tOTUConTaxonomy"; - out << header << endl; + - for (int i = 0; i < list->getNumBins(); i++) { - - if (m->control_pressed) { break; } + if (listfile != "") { + //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile + ListVector* list = getList(); - out << (i+1) << '\t'; + if (otuLabels.size() != list->getNumBins()) { + m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true; } - vector binNames; - string bin = list->get(i); + if (m->control_pressed) { delete list; return 0; } - map::iterator it = repNames.find(bin); - if (it == repNames.end()) { - m->mothurOut("[ERROR: OTU " + toString(i+1) + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + GroupMap* groupmap = NULL; + if (groupfile != "") { + groupmap = new GroupMap(groupfile); + groupmap->readMap(); } - m->splitAtComma(bin, binNames); + if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; } - //sanity check - if (binNames.size() != classifyOtuSizes[i]) { - m->mothurOut("[ERROR: OTU " + toString(i+1) + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + if (groupfile != "") { + header = "OTUNumber\t"; + for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; } + }else if (countfile != "") { + if (ct.hasGroupInfo()) { + header = "OTUNumber\t"; + for (int i = 0; i < ct.getNamesOfGroups().size(); i++) { header += (ct.getNamesOfGroups())[i] + '\t'; } + } } + header += "repSeqName\trepSeq\tOTUConTaxonomy"; + out << header << endl; - //output abundances - if (groupfile != "") { - string groupAbunds = ""; - map counts; - //initialize counts to 0 - for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; } + vector binLabels = list->getLabels(); + for (int i = 0; i < list->getNumBins(); i++) { + + int index = findIndex(otuLabels, binLabels[i]); + if (index == -1) { m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } + + if (m->control_pressed) { break; } + + out << otuLabels[index] << '\t'; + + vector binNames; + string bin = list->get(i); + m->splitAtComma(bin, binNames); + + string seqRepName = ""; + int numSeqsRep = 0; - //find abundances by group - bool error = false; - for (int j = 0; j < binNames.size(); j++) { - string group = groupmap->getGroup(binNames[j]); - if (group == "not found") { - m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n"); - error = true; - }else { counts[group]++; } + if (countfile == "") { + sort(binNames.begin(), binNames.end()); + bin = ""; + for (int j = 0; j < binNames.size()-1; j++) { + bin += binNames[j] + ','; + } + bin += binNames[binNames.size()-1]; + map::iterator it = repNames.find(bin); + + if (it == repNames.end()) { + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + }else { seqRepName = it->second; numSeqsRep = binNames.size(); } + + //sanity check + if (binNames.size() != classifyOtuSizes[index]) { + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + } + }else { + //find rep sequence in bin + for (int j = 0; j < binNames.size(); j++) { + map::iterator itNameMap = nameMap.find(binNames[j]); //if you are in the counttable you must be the rep. because get.oturep with a countfile only includes the rep sequences in the rep.count file. + if (itNameMap != nameMap.end()) { + seqRepName = itNameMap->first; + numSeqsRep = itNameMap->second; + j += binNames.size(); + } + } + + if (seqRepName == "") { + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + } + + if (numSeqsRep != classifyOtuSizes[i]) { + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + } } - //output counts - for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t'; } + //output abundances + if (groupfile != "") { + string groupAbunds = ""; + map counts; + //initialize counts to 0 + for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; } + + //find abundances by group + bool error = false; + for (int j = 0; j < binNames.size(); j++) { + string group = groupmap->getGroup(binNames[j]); + if (group == "not found") { + m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n"); + error = true; + }else { counts[group]++; } + } + + //output counts + for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t'; } + + if (error) { m->control_pressed = true; } + }else if (countfile != "") { + if (ct.hasGroupInfo()) { + vector groupCounts = ct.getGroupCounts(seqRepName); + for (int j = 0; j < groupCounts.size(); j++) { out << groupCounts[j] << '\t'; } + }else { out << numSeqsRep << '\t'; } + }else { out << numSeqsRep << '\t'; } - if (error) { m->control_pressed = true; } - }else { out << binNames.size() << '\t'; } + //output repSeq + out << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; + } + + + delete list; + if (groupfile != "") { delete groupmap; } + + }else { + vector lookup = getShared(); + + header = "OTUNumber\t"; + for (int i = 0; i < lookup.size(); i++) { header += lookup[i]->getGroup() + '\t'; } + header += "repSeqName\trepSeq\tOTUConTaxonomy"; + out << header << endl; - //output repSeq - out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl; + for (int h = 0; h < lookup[0]->getNumBins(); h++) { + + if (m->control_pressed) { break; } + + int index = findIndex(otuLabels, m->currentSharedBinLabels[h]); + if (index == -1) { m->mothurOut("[ERROR]: " + m->currentSharedBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } + + if (m->control_pressed) { break; } + + out << otuLabels[index] << '\t'; + + int totalAbund = 0; + for (int i = 0; i < lookup.size(); i++) { + int abund = lookup[i]->getAbundance(h); + totalAbund += abund; + out << abund << '\t'; + } + + //sanity check + if (totalAbund != classifyOtuSizes[index]) { + m->mothurOut("[WARNING]: OTU " + m->currentSharedBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break; + } + + //output repSeq + out << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; + } } out.close(); - - delete list; - if (groupfile != "") { delete groupmap; } - if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; } m->mothurOutEndLine(); @@ -326,7 +506,21 @@ int CreateDatabaseCommand::execute(){ } } //********************************************************************************************************************** -vector CreateDatabaseCommand::readTax(vector& taxonomies){ +int CreateDatabaseCommand::findIndex(vector& otuLabels, string label){ + try { + int index = -1; + for (int i = 0; i < otuLabels.size(); i++) { + if (m->isLabelEquivalent(otuLabels[i],label)) { index = i; break; } + } + return index; + } + catch(exception& e) { + m->errorOut(e, "CreateDatabaseCommand", "findIndex"); + exit(1); + } +} +//********************************************************************************************************************** +vector CreateDatabaseCommand::readTax(vector& taxonomies, vector& otuLabels){ try { vector sizes; @@ -348,6 +542,7 @@ vector CreateDatabaseCommand::readTax(vector& taxonomies){ sizes.push_back(size); taxonomies.push_back(tax); + otuLabels.push_back(otu); } in.close(); @@ -367,6 +562,7 @@ vector CreateDatabaseCommand::readFasta(vector& seqs){ ifstream in; m->openInputFile(repfastafile, in); + set sanity; while (!in.eof()) { if (m->control_pressed) { break; } @@ -377,11 +573,20 @@ vector CreateDatabaseCommand::readFasta(vector& seqs){ //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file vector info; m->splitAtChar(binInfo, info, '|'); - if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break;} + //if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break;} int size = 0; m->mothurConvert(info[1], size); + int binNumber = 0; + string temp = ""; + for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } } + m->mothurConvert(temp, binNumber); + set::iterator it = sanity.find(binNumber); + if (it != sanity.end()) { + m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break; + }else { sanity.insert(binNumber); } + sizes.push_back(size); seqs.push_back(seq); } @@ -472,5 +677,81 @@ ListVector* CreateDatabaseCommand::getList(){ } } //********************************************************************************************************************** +vector CreateDatabaseCommand::getShared(){ + try { + InputData input(sharedfile, "sharedfile"); + vector lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + if (label == "") { label = lastLabel; return lookup; } + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set labels; labels.insert(label); + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && (userLabels.size() != 0)) { + if (m->control_pressed) { return lookup; } + + if(labels.count(lookup[0]->getLabel()) == 1){ + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + break; + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + break; + } + + lastLabel = lookup[0]->getLabel(); + + //get next line to process + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(); + } + + + if (m->control_pressed) { return lookup; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + } + + return lookup; + } + catch(exception& e) { + m->errorOut(e, "CreateDatabaseCommand", "getShared"); + exit(1); + } +} + +//**********************************************************************************************************************