X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=createdatabasecommand.cpp;h=bc17323faa481ee15db2835aeca435bc1aaf693b;hb=a89c6295ae9a35fcaaab7fa50dcb68360dd543b0;hp=58799e7a33e840580654d006b7a085ec02e7c5f5;hpb=ac663461b19ad1436a06aa63f97221d1ff105482;p=mothur.git diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp index 58799e7..bc17323 100644 --- a/createdatabasecommand.cpp +++ b/createdatabasecommand.cpp @@ -16,7 +16,7 @@ vector CreateDatabaseCommand::setParameters(){ CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); - CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pcontaxonomy); + CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy); CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); @@ -37,11 +37,11 @@ string CreateDatabaseCommand::getHelpString(){ try { string helpString = ""; helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, or count file and creates a database file.\n"; - helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group, count and label. List, repfasta, repnames or count, and contaxonomy are required.\n"; + helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. List, repfasta, repnames or count, and constaxonomy are required.\n"; helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n"; - helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n"; + helpString += "The constaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n"; helpString += "The group file is optional and will just give you the abundance breakdown by group.\n"; helpString += "The label parameter allows you to specify a label to be used from your listfile.\n"; helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n"; @@ -133,12 +133,12 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option) { if (path == "") { parameters["repname"] = inputDir + it->second; } } - it = parameters.find("contaxonomy"); + it = parameters.find("constaxonomy"); //user has given a template file if(it != parameters.end()){ path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["contaxonomy"] = inputDir + it->second; } + if (path == "") { parameters["constaxonomy"] = inputDir + it->second; } } it = parameters.find("repfasta"); @@ -208,9 +208,9 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option) { if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } } else { if (outputDir == "") { outputDir = m->hasPath(listfile); } } - contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true); + contaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true); if (contaxonomyfile == "not found") { //if there is a current list file, use it - contaxonomyfile = ""; m->mothurOut("The contaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; + contaxonomyfile = ""; m->mothurOut("The constaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; } else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; } @@ -298,7 +298,7 @@ int CreateDatabaseCommand::execute(){ } repNames = tempRepNames; }else { - ct.readTable(countfile, true); + ct.readTable(countfile, true, false); numUniqueNamesFile = ct.getNumUniqueSeqs(); nameMap = ct.getNameMap(); } @@ -364,11 +364,15 @@ int CreateDatabaseCommand::execute(){ header += "repSeqName\trepSeq\tOTUConTaxonomy"; out << header << endl; + vector binLabels = list->getLabels(); for (int i = 0; i < list->getNumBins(); i++) { + int index = findIndex(otuLabels, binLabels[i]); + if (index == -1) { m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } + if (m->control_pressed) { break; } - out << otuLabels[i] << '\t'; + out << otuLabels[index] << '\t'; vector binNames; string bin = list->get(i); @@ -380,19 +384,19 @@ int CreateDatabaseCommand::execute(){ if (countfile == "") { sort(binNames.begin(), binNames.end()); bin = ""; - for (int i = 0; i < binNames.size()-1; i++) { - bin += binNames[i] + ','; + for (int j = 0; j < binNames.size()-1; j++) { + bin += binNames[j] + ','; } bin += binNames[binNames.size()-1]; map::iterator it = repNames.find(bin); if (it == repNames.end()) { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; }else { seqRepName = it->second; numSeqsRep = binNames.size(); } //sanity check - if (binNames.size() != classifyOtuSizes[i]) { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + if (binNames.size() != classifyOtuSizes[index]) { + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; } }else { //find rep sequence in bin @@ -406,11 +410,11 @@ int CreateDatabaseCommand::execute(){ } if (seqRepName == "") { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; } if (numSeqsRep != classifyOtuSizes[i]) { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; } } @@ -443,7 +447,7 @@ int CreateDatabaseCommand::execute(){ }else { out << numSeqsRep << '\t'; } //output repSeq - out << seqRepName << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl; + out << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; } @@ -462,8 +466,8 @@ int CreateDatabaseCommand::execute(){ if (m->control_pressed) { break; } - int index = findIndex(otuLabels, m->currentBinLabels[h]); - if (index == -1) { m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } + int index = findIndex(otuLabels, m->currentSharedBinLabels[h]); + if (index == -1) { m->mothurOut("[ERROR]: " + m->currentSharedBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } if (m->control_pressed) { break; } @@ -478,7 +482,7 @@ int CreateDatabaseCommand::execute(){ //sanity check if (totalAbund != classifyOtuSizes[index]) { - m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break; + m->mothurOut("[WARNING]: OTU " + m->currentSharedBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break; } //output repSeq @@ -506,7 +510,7 @@ int CreateDatabaseCommand::findIndex(vector& otuLabels, string label){ try { int index = -1; for (int i = 0; i < otuLabels.size(); i++) { - if (otuLabels[i] == label) { index = i; break; } + if (m->isLabelEquivalent(otuLabels[i],label)) { index = i; break; } } return index; } @@ -577,7 +581,7 @@ vector CreateDatabaseCommand::readFasta(vector& seqs){ int binNumber = 0; string temp = ""; for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } } - m->mothurConvert(temp, binNumber); + m->mothurConvert(m->getSimpleLabel(temp), binNumber); set::iterator it = sanity.find(binNumber); if (it != sanity.end()) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break; @@ -743,7 +747,7 @@ vector CreateDatabaseCommand::getShared(){ return lookup; } catch(exception& e) { - m->errorOut(e, "CreateDatabaseCommand", "getList"); + m->errorOut(e, "CreateDatabaseCommand", "getShared"); exit(1); } }