if(globaldata->gListVector != NULL) {
listOfNames = new ListVector(*globaldata->gListVector);
+ vector<string> names;
+ string binnames;
//map names to rows in sparsematrix
for (int i = 0; i < listOfNames->size(); i++) {
- nameToIndex[listOfNames->get(i)] = i;
+ names.clear();
+ binnames = listOfNames->get(i);
+ splitAtComma(binnames, names);
+
+ for (int j = 0; j < names.size(); j++) {
+ nameToIndex[names[j]] = i;
+ }
}
- }else { cout << "error" << endl; }
+ }else { cout << "error, no listvector." << endl; }
fastafile = globaldata->getFastaFile();
namesfile = globaldata->getNameFile();
+ groupfile = globaldata->getGroupFile();
+
+ if (groupfile != "") {
+ //read in group map info.
+ groupMap = new GroupMap(groupfile);
+ groupMap->readMap();
+ }
+
openInputFile(fastafile, in);
fasta = new FastaMap();
delete input;
delete read;
delete fasta;
+ if (groupfile != "") {
+ delete groupMap;
+ }
}
//**********************************************************************************************************************
}
//read list file
- read = new ReadPhilFile(globaldata->getListFile());
+ read = new ReadOTUFile(globaldata->getListFile());
read->read(&*globaldata);
input = globaldata->ginput;
//for each bin in the list vector
for (int i = 0; i < list->size(); i++) {
- nameRep = FindRep(i);
+ string groups;
+ nameRep = FindRep(i, groups);
//print out name and sequence for that bin
sequence = fasta->getSequence(nameRep);
if (sequence != "not found") {
- nameRep = nameRep + "|" + toString(i+1);
- out << ">" << nameRep << endl;
- out << sequence << endl;
+ if (groupfile == "") {
+ nameRep = nameRep + "|" + toString(i+1);
+ out << ">" << nameRep << endl;
+ out << sequence << endl;
+ }else {
+ nameRep = nameRep + "|" + groups + "|" + toString(i+1);
+ out << ">" << nameRep << endl;
+ out << sequence << endl;
+ }
}else {
cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl;
remove(outputFileName.c_str());
}
}
//**********************************************************************************************************************
-string GetOTURepCommand::FindRep(int bin) {
+string GetOTURepCommand::FindRep(int bin, string& group) {
try{
vector<string> names;
map<string, float> sums;
string binnames;
float min = 10000;
string minName;
+ map<string, string> groups;
+ map<string, string>::iterator groupIt;
binnames = list->get(bin);
-
+
//parse names into vector
splitAtComma(binnames, names);
+ //if you have a groupfile
+ if(groupfile != "") {
+ //find the groups that are in this bin
+ for (int i = 0; i < names.size(); i++) {
+ string groupName = groupMap->getGroup(names[i]);
+ if (groupName == "not found") {
+ cout << names[i] << " is missing from your group file. Please correct. " << endl;
+ groupError = true;
+ }else{
+ groups[groupName] = groupName;
+ }
+ }
+
+ //turn the groups into a string
+ for(groupIt = groups.begin(); groupIt != groups.end(); groupIt++) { group += groupIt->first + "-"; }
+
+ //rip off last dash
+ group = group.substr(0, group.length()-1);
+ }
+
//if only 1 sequence in bin then that's the rep
if (names.size() == 1) { return names[0]; }
else {
//fill binMap
for (int i = 0; i < names.size(); i++) {
for (it3 = nameToIndex.begin(); it3 != nameToIndex.end(); it3++) {
+
if (it3->first == names[i]) {
binMap[it3->second] = it3->first;