5 * Created by westcott on 6/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "countseqscommand.h"
11 #include "sharedutilities.h"
12 #include "counttable.h"
13 #include "inputdata.h"
15 //**********************************************************************************************************************
16 vector<string> CountSeqsCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "NameSHared-sharedGroup", "NameSHared", "none","count",false,false,true); parameters.push_back(pshared);
19 CommandParameter pname("name", "InputTypes", "", "", "NameSHared", "NameSHared", "none","count",false,false,true); parameters.push_back(pname);
20 CommandParameter pgroup("group", "InputTypes", "", "", "sharedGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
21 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
22 CommandParameter plarge("large", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plarge);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "CountSeqsCommand", "setParameters");
36 //**********************************************************************************************************************
37 string CountSeqsCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The count.seqs aka. make.table command reads a name or shared file and outputs a .count_table file. You may also provide a group with the names file to get the counts broken down by group.\n";
41 helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n";
42 helpString += "The large parameter indicates the name and group files are too large to fit in RAM.\n";
43 helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n";
44 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
45 helpString += "The count.seqs command should be in the following format: count.seqs(name=yourNameFile).\n";
46 helpString += "Example count.seqs(name=amazon.names) or make.table(name=amazon.names).\n";
47 helpString += "Note: No spaces between parameter labels (i.e. name), '=' and parameters (i.e.yourNameFile).\n";
51 m->errorOut(e, "CountSeqsCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string CountSeqsCommand::getOutputPattern(string type) {
59 if (type == "count") { pattern = "[filename],count_table-[filename],[distance],count_table"; }
60 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
65 m->errorOut(e, "CountSeqsCommand", "getOutputPattern");
69 //**********************************************************************************************************************
70 CountSeqsCommand::CountSeqsCommand(){
72 abort = true; calledHelp = true;
74 vector<string> tempOutNames;
75 outputTypes["count"] = tempOutNames;
78 m->errorOut(e, "CountSeqsCommand", "CountSeqsCommand");
82 //**********************************************************************************************************************
84 CountSeqsCommand::CountSeqsCommand(string option) {
86 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93 vector<string> myArray = setParameters();
95 OptionParser parser(option);
96 map<string,string> parameters = parser.getParameters();
98 ValidParameters validParameter;
99 map<string,string>::iterator it;
101 //check to make sure all parameters are valid for command
102 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
103 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
106 //initialize outputTypes
107 vector<string> tempOutNames;
108 outputTypes["count"] = tempOutNames;
111 //if the user changes the input directory command factory will send this info to us in the output parameter
112 string inputDir = validParameter.validFile(parameters, "inputdir", false);
113 if (inputDir == "not found"){ inputDir = ""; }
116 it = parameters.find("name");
117 //user has given a template file
118 if(it != parameters.end()){
119 path = m->hasPath(it->second);
120 //if the user has not given a path then, add inputdir. else leave path alone.
121 if (path == "") { parameters["name"] = inputDir + it->second; }
124 it = parameters.find("group");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["group"] = inputDir + it->second; }
132 it = parameters.find("shared");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["shared"] = inputDir + it->second; }
141 //check for required parameters
142 namefile = validParameter.validFile(parameters, "name", true);
143 if (namefile == "not open") { namefile = ""; abort = true; }
144 else if (namefile == "not found"){ namefile = ""; }
145 else { m->setNameFile(namefile); }
147 sharedfile = validParameter.validFile(parameters, "shared", true);
148 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
149 else if (sharedfile == "not found"){ sharedfile = ""; }
150 else { m->setSharedFile(sharedfile); }
152 groupfile = validParameter.validFile(parameters, "group", true);
153 if (groupfile == "not open") { abort = true; }
154 else if (groupfile == "not found") { groupfile = ""; }
155 else { m->setGroupFile(groupfile); }
157 if ((namefile == "") && (sharedfile == "")) {
158 namefile = m->getNameFile();
159 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
161 sharedfile = m->getSharedFile();
162 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
164 m->mothurOut("You have no current namefile or sharedfile and the name or shared parameter is required."); m->mothurOutEndLine(); abort = true;
169 groups = validParameter.validFile(parameters, "groups", false);
170 if (groups == "not found") { groups = "all"; }
171 m->splitAtDash(groups, Groups);
172 m->setGroups(Groups);
174 string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
175 large = m->isTrue(temp);
177 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
178 m->setProcessors(temp);
179 m->mothurConvert(temp, processors);
181 //if the user changes the output directory command factory will send this info to us in the output parameter
182 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
187 catch(exception& e) {
188 m->errorOut(e, "CountSeqsCommand", "CountSeqsCommand");
192 //**********************************************************************************************************************
194 int CountSeqsCommand::execute(){
197 if (abort == true) { if (calledHelp) { return 0; } return 2; }
199 map<string, string> variables;
201 if (namefile != "") {
203 int start = time(NULL);
204 if (outputDir == "") { outputDir = m->hasPath(namefile); }
205 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
206 string outputFileName = getOutputFileName("count", variables);
208 if (!large) { total = processSmall(outputFileName); }
209 else { total = processLarge(outputFileName); }
211 if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
213 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create a table for " + toString(total) + " sequences.");
214 m->mothurOutEndLine(); m->mothurOutEndLine();
216 m->mothurOutEndLine();
217 m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
220 if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
221 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
223 InputData input(sharedfile, "sharedfile");
224 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
225 string lastLabel = lookup[0]->getLabel();
227 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
228 set<string> processedLabels;
229 set<string> userLabels = labels;
231 //as long as you are not at the end of the file or done wih the lines you want
232 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
234 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
236 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
238 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
240 processShared(lookup, variables);
242 processedLabels.insert(lookup[0]->getLabel());
243 userLabels.erase(lookup[0]->getLabel());
246 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
247 string saveLabel = lookup[0]->getLabel();
249 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
250 lookup = input.getSharedRAbundVectors(lastLabel);
251 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
253 processShared(lookup, variables);
255 processedLabels.insert(lookup[0]->getLabel());
256 userLabels.erase(lookup[0]->getLabel());
258 //restore real lastlabel to save below
259 lookup[0]->setLabel(saveLabel);
262 lastLabel = lookup[0]->getLabel();
263 //prevent memory leak
264 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
266 if (m->control_pressed) { return 0; }
268 //get next line to process
269 lookup = input.getSharedRAbundVectors();
272 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
274 //output error messages about any remaining user labels
275 set<string>::iterator it;
276 bool needToRun = false;
277 for (it = userLabels.begin(); it != userLabels.end(); it++) {
278 m->mothurOut("Your file does not include the label " + *it);
279 if (processedLabels.count(lastLabel) != 1) {
280 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
283 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
287 //run last label if you need to
288 if (needToRun == true) {
289 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
290 lookup = input.getSharedRAbundVectors(lastLabel);
292 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
294 processShared(lookup, variables);
296 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
301 //set rabund file as new current rabundfile
302 itTypes = outputTypes.find("count");
303 if (itTypes != outputTypes.end()) {
304 if ((itTypes->second).size() != 0) { string current = (itTypes->second)[0]; m->setCountTableFile(current); }
307 m->mothurOutEndLine();
308 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
309 for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
310 m->mothurOutEndLine();
315 catch(exception& e) {
316 m->errorOut(e, "CountSeqsCommand", "execute");
320 //**********************************************************************************************************************
322 int CountSeqsCommand::processShared(vector<SharedRAbundVector*>& lookup, map<string, string> variables){
324 variables["[distance]"] = lookup[0]->getLabel();
325 string outputFileName = getOutputFileName("count", variables);
326 outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName);
329 m->openOutputFile(outputFileName, out);
331 out << "OTU_Label\ttotal\t";
332 for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; } out << endl;
334 for (int j = 0; j < lookup[0]->getNumBins(); j++) {
335 if (m->control_pressed) { break; }
339 for (int i = 0; i < lookup.size(); i++) {
340 total += lookup[i]->getAbundance(j);
341 output += toString(lookup[i]->getAbundance(j)) + '\t';
343 out << m->currentSharedBinLabels[j] << '\t' << total << '\t' << output << endl;
349 catch(exception& e) {
350 m->errorOut(e, "CountSeqsCommand", "processShared");
354 //**********************************************************************************************************************
356 int CountSeqsCommand::processSmall(string outputFileName){
359 m->openOutputFile(outputFileName, out); outputTypes["count"].push_back(outputFileName);
360 outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName);
361 out << "Representative_Sequence\ttotal\t";
364 if (groupfile != "") {
365 groupMap = new GroupMap(groupfile); groupMap->readMap();
367 //make sure groups are valid. takes care of user setting groupNames that are invalid or setting groups=all
368 SharedUtil* util = new SharedUtil();
369 vector<string> nameGroups = groupMap->getNamesOfGroups();
370 util->setGroups(Groups, nameGroups);
373 //sort groupNames so that the group title match the counts below, this is needed because the map object automatically sorts
374 sort(Groups.begin(), Groups.end());
377 for (int i = 0; i < Groups.size(); i++) {
378 out << Groups[i] << '\t';
384 int total = createProcesses(groupMap, outputFileName);
386 if (groupfile != "") { delete groupMap; }
390 catch(exception& e) {
391 m->errorOut(e, "CountSeqsCommand", "processSmall");
395 /**************************************************************************************************/
396 int CountSeqsCommand::createProcesses(GroupMap*& groupMap, string outputFileName) {
399 vector<int> processIDS;
401 vector<unsigned long long> positions;
402 vector<linePair> lines;
405 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
406 positions = m->divideFilePerLine(namefile, processors);
407 for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
409 if(processors == 1){ lines.push_back(linePair(0, 1000)); }
412 positions = m->setFilePosEachLine(namefile, numSeqs);
413 if (positions.size() < processors) { processors = positions.size(); }
415 //figure out how many sequences you have to process
416 int numSeqsPerProcessor = numSeqs / processors;
417 for (int i = 0; i < processors; i++) {
418 int startIndex = i * numSeqsPerProcessor;
419 if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; }
420 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
426 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
428 //loop through and create all the processes you want
429 while (process != processors-1) {
433 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
436 string filename = m->mothurGetpid(process) + ".temp";
437 numSeqs = driver(lines[process].start, lines[process].end, filename, groupMap);
439 string tempFile = m->mothurGetpid(process) + ".num.temp";
441 m->openOutputFile(tempFile, outTemp);
443 outTemp << numSeqs << endl;
448 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
449 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
454 string filename = m->mothurGetpid(process) + ".temp";
455 numSeqs = driver(lines[processors-1].start, lines[processors-1].end, filename, groupMap);
457 //force parent to wait until all the processes are done
458 for (int i=0;i<processIDS.size();i++) {
459 int temp = processIDS[i];
463 for (int i = 0; i < processIDS.size(); i++) {
464 string tempFile = toString(processIDS[i]) + ".num.temp";
466 m->openInputFile(tempFile, intemp);
469 intemp >> num; intemp.close();
471 m->mothurRemove(tempFile);
474 vector<countData*> pDataArray;
475 DWORD dwThreadIdArray[processors-1];
476 HANDLE hThreadArray[processors-1];
477 vector<GroupMap*> copies;
479 //Create processor worker threads.
480 for( int i=0; i<processors-1; i++ ){
481 string filename = toString(i) + ".temp";
483 GroupMap* copyGroup = new GroupMap();
484 copyGroup->getCopy(groupMap);
485 copies.push_back(copyGroup);
486 vector<string> cGroups = Groups;
488 countData* temp = new countData(filename, copyGroup, m, lines[i].start, lines[i].end, groupfile, namefile, cGroups);
489 pDataArray.push_back(temp);
490 processIDS.push_back(i);
492 hThreadArray[i] = CreateThread(NULL, 0, MyCountThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
495 string filename = toString(processors-1) + ".temp";
496 numSeqs = driver(lines[processors-1].start, lines[processors-1].end, filename, groupMap);
498 //Wait until all threads have terminated.
499 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
501 //Close all thread handles and free memory allocations.
502 for(int i=0; i < pDataArray.size(); i++){
503 numSeqs += pDataArray[i]->total;
505 CloseHandle(hThreadArray[i]);
506 delete pDataArray[i];
510 //append output files
511 for(int i=0;i<processIDS.size();i++){
512 m->appendFiles((toString(processIDS[i]) + ".temp"), outputFileName);
513 m->mothurRemove((toString(processIDS[i]) + ".temp"));
515 m->appendFiles(filename, outputFileName);
516 m->mothurRemove(filename);
520 if (groupfile != "") {
521 if (numSeqs != groupMap->getNumSeqs()) {
522 m->mothurOut("[ERROR]: processes reported processing " + toString(numSeqs) + " sequences, but group file indicates you have " + toString(groupMap->getNumSeqs()) + " sequences.");
523 if (processors == 1) { m->mothurOut(" Could you have a file mismatch?\n"); }
524 else { m->mothurOut(" Either you have a file mismatch or a process failed to complete the task assigned to it.\n"); m->control_pressed = true; }
529 catch(exception& e) {
530 m->errorOut(e, "CountSeqsCommand", "createProcesses");
534 /**************************************************************************************************/
535 int CountSeqsCommand::driver(unsigned long long start, unsigned long long end, string outputFileName, GroupMap*& groupMap) {
539 m->openOutputFile(outputFileName, out);
542 m->openInputFile(namefile, in);
548 if (m->control_pressed) { break; }
550 string firstCol, secondCol;
551 in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
552 //cout << firstCol << '\t' << secondCol << endl;
553 m->checkName(firstCol);
554 m->checkName(secondCol);
555 //cout << firstCol << '\t' << secondCol << endl;
557 vector<string> names;
558 m->splitAtChar(secondCol, names, ',');
560 if (groupfile != "") {
562 map<string, int> groupCounts;
564 for (int i = 0; i < Groups.size(); i++) { groupCounts[Groups[i]] = 0; }
566 //get counts for each of the users groups
567 for (int i = 0; i < names.size(); i++) {
568 string group = groupMap->getGroup(names[i]);
570 if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); }
572 map<string, int>::iterator it = groupCounts.find(group);
574 //if not found, then this sequence is not from a group we care about
575 if (it != groupCounts.end()) {
583 out << firstCol << '\t' << total << '\t';
584 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
585 out << it->second << '\t';
590 out << firstCol << '\t' << names.size() << endl;
593 total += names.size();
595 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
596 unsigned long long pos = in.tellg();
597 if ((pos == -1) || (pos >= end)) { break; }
599 if (in.eof()) { break; }
609 catch(exception& e) {
610 m->errorOut(e, "CountSeqsCommand", "driver");
614 //**********************************************************************************************************************
616 int CountSeqsCommand::processLarge(string outputFileName){
618 set<string> namesOfGroups;
619 map<string, int> initial;
620 for (set<string>::iterator it = namesOfGroups.begin(); it != namesOfGroups.end(); it++) { initial[(*it)] = 0; }
622 m->openOutputFile(outputFileName, out);
623 outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName);
624 out << "Representative_Sequence\ttotal\t";
625 if (groupfile == "") { out << endl; }
627 map<string, unsigned long long> namesToIndex;
628 string outfile = m->getRootName(groupfile) + "sorted.groups.temp";
629 string outName = m->getRootName(namefile) + "sorted.name.temp";
630 map<int, string> indexToName;
631 map<int, string> indexToGroup;
632 if (groupfile != "") {
633 time_t estart = time(NULL);
634 //convert name file to redundant -> unique. set unique name equal to index so we can use vectors, save name for later.
635 string newNameFile = m->getRootName(namefile) + ".name.temp";
636 string newGroupFile = m->getRootName(groupfile) + ".group.temp";
637 indexToName = processNameFile(newNameFile);
638 indexToGroup = getGroupNames(newGroupFile, namesOfGroups);
640 //sort file by first column so the names of sequences will be easier to find
642 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
643 string command = "sort -n " + newGroupFile + " -o " + outfile;
644 system(command.c_str());
645 command = "sort -n " + newNameFile + " -o " + outName;
646 system(command.c_str());
647 #else //sort using windows sort
648 string command = "sort " + newGroupFile + " /O " + outfile;
649 system(command.c_str());
650 command = "sort " + newNameFile + " /O " + outName;
651 system(command.c_str());
653 m->mothurRemove(newNameFile);
654 m->mothurRemove(newGroupFile);
656 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort and index the group and name files. "); m->mothurOutEndLine();
657 }else { outName = namefile; }
659 time_t estart = time(NULL);
662 m->openInputFile(outName, in);
668 vector< vector<int> > nameMapCount;
669 if (groupfile != "") {
670 m->openInputFile(outfile, in2);
671 nameMapCount.resize(indexToName.size());
672 for (int i = 0; i < nameMapCount.size(); i++) {
673 nameMapCount[i].resize(indexToGroup.size(), 0);
678 if (m->control_pressed) { break; }
681 in >> firstCol; m->gobble(in);
683 if (groupfile != "") {
685 in >> uniqueIndex; m->gobble(in);
687 string name; int groupIndex;
688 in2 >> name >> groupIndex; m->gobble(in2);
690 if (name != firstCol) { m->mothurOut("[ERROR]: found " + name + " in your groupfile, but " + firstCol + " was in your namefile, please correct.\n"); m->control_pressed = true; }
692 nameMapCount[uniqueIndex][groupIndex]++;
696 in >> secondCol; m->gobble(in);
697 int num = m->getNumNames(secondCol);
698 out << firstCol << '\t' << num << endl;
704 if (groupfile != "") {
705 m->mothurRemove(outfile);
706 m->mothurRemove(outName);
708 for (map<int, string>::iterator it = indexToGroup.begin(); it != indexToGroup.end(); it++) { out << it->second << '\t'; }
710 for (int i = 0; i < nameMapCount.size(); i++) {
711 string totalsLine = "";
713 for (int j = 0; j < nameMapCount[i].size(); j++) {
714 seqTotal += nameMapCount[i][j];
715 totalsLine += toString(nameMapCount[i][j]) + '\t';
717 out << indexToName[i] << '\t' << seqTotal << '\t' << totalsLine << endl;
723 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to create the count table file. "); m->mothurOutEndLine();
727 catch(exception& e) {
728 m->errorOut(e, "CountSeqsCommand", "processLarge");
732 /**************************************************************************************************/
733 map<int, string> CountSeqsCommand::processNameFile(string name) {
735 map<int, string> indexToNames;
738 m->openOutputFile(name, out);
742 m->openInputFile(namefile, in);
746 bool pairDone = false;
747 bool columnOne = true;
748 string firstCol, secondCol;
752 if (m->control_pressed) { break; }
754 in.read(buffer, 4096);
755 vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
757 for (int i = 0; i < pieces.size(); i++) {
758 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
759 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
762 m->checkName(firstCol);
763 m->checkName(secondCol);
764 //parse names into vector
765 vector<string> theseNames;
766 m->splitAtComma(secondCol, theseNames);
767 for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; }
768 indexToNames[count] = firstCol;
778 vector<string> pieces = m->splitWhiteSpace(rest);
780 for (int i = 0; i < pieces.size(); i++) {
781 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
782 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
785 m->checkName(firstCol);
786 m->checkName(secondCol);
787 //parse names into vector
788 vector<string> theseNames;
789 m->splitAtComma(secondCol, theseNames);
790 for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; }
791 indexToNames[count] = firstCol;
802 catch(exception& e) {
803 m->errorOut(e, "CountSeqsCommand", "processNameFile");
807 /**************************************************************************************************/
808 map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& namesOfGroups) {
810 map<int, string> indexToGroups;
811 map<string, int> groupIndex;
812 map<string, int>::iterator it;
815 m->openOutputFile(filename, out);
819 m->openInputFile(groupfile, in);
823 bool pairDone = false;
824 bool columnOne = true;
825 string firstCol, secondCol;
829 if (m->control_pressed) { break; }
831 in.read(buffer, 4096);
832 vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
834 for (int i = 0; i < pieces.size(); i++) {
835 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
836 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
839 m->checkName(firstCol);
840 it = groupIndex.find(secondCol);
841 if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
842 groupIndex[secondCol] = count;
845 out << firstCol << '\t' << groupIndex[secondCol] << endl;
846 namesOfGroups.insert(secondCol);
855 vector<string> pieces = m->splitWhiteSpace(rest);
857 for (int i = 0; i < pieces.size(); i++) {
858 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
859 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
862 m->checkName(firstCol);
863 it = groupIndex.find(secondCol);
864 if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
865 groupIndex[secondCol] = count;
868 out << firstCol << '\t' << groupIndex[secondCol] << endl;
869 namesOfGroups.insert(secondCol);
876 for (it = groupIndex.begin(); it != groupIndex.end(); it++) { indexToGroups[it->second] = it->first; }
878 return indexToGroups;
880 catch(exception& e) {
881 m->errorOut(e, "CountSeqsCommand", "getGroupNames");
885 //**********************************************************************************************************************