5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutEndLine() {
355 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
357 if (pid == 0) { //only one process should output to screen
367 catch(exception& e) {
368 errorOut(e, "MothurOut", "MothurOutEndLine");
372 /*********************************************************************************************/
373 void MothurOut::mothurOut(string output, ofstream& outputFile) {
378 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
380 if (pid == 0) { //only one process should output to screen
385 outputFile << output;
393 catch(exception& e) {
394 errorOut(e, "MothurOut", "MothurOut");
398 /*********************************************************************************************/
399 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
403 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
405 if (pid == 0) { //only one process should output to screen
416 catch(exception& e) {
417 errorOut(e, "MothurOut", "MothurOutEndLine");
421 /*********************************************************************************************/
422 void MothurOut::mothurOutJustToLog(string output) {
426 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
428 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutJustToLog");
442 /*********************************************************************************************/
443 void MothurOut::errorOut(exception& e, string object, string function) {
445 //mem_usage(vm, rss);
447 string errorType = toString(e.what());
449 int pos = errorType.find("bad_alloc");
450 mothurOut("[ERROR]: ");
451 mothurOut(errorType);
453 if (pos == string::npos) { //not bad_alloc
454 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
457 if (object == "cluster"){
458 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
459 }else if (object == "shhh.flows"){
460 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
462 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
466 /*********************************************************************************************/
467 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
468 // process_mem_usage(double &, double &) - takes two doubles by reference,
469 // attempts to read the system-dependent data for a process' virtual memory
470 // size and resident set size, and return the results in KB.
472 // On failure, returns 0.0, 0.0
473 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
474 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
479 // 'file' stat seems to give the most reliable results
481 ifstream stat_stream("/proc/self/stat",ios_base::in);
483 // dummy vars for leading entries in stat that we don't care about
485 string pid, comm, state, ppid, pgrp, session, tty_nr;
486 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
487 string utime, stime, cutime, cstime, priority, nice;
488 string O, itrealvalue, starttime;
490 // the two fields we want
495 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
496 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
497 >> utime >> stime >> cutime >> cstime >> priority >> nice
498 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
500 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
501 vm_usage = vsize / 1024.0;
502 resident_set = rss * page_size_kb;
504 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
508 /* //windows memory usage
509 // Get the list of process identifiers.
510 DWORD aProcesses[1024], cbNeeded, cProcesses;
512 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
514 // Calculate how many process identifiers were returned.
515 cProcesses = cbNeeded / sizeof(DWORD);
517 // Print the memory usage for each process
518 for (int i = 0; i < cProcesses; i++ ) {
519 DWORD processID = aProcesses[i];
521 PROCESS_MEMORY_COUNTERS pmc;
523 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
525 // Print the process identifier.
526 printf( "\nProcess ID: %u\n", processID);
528 if (NULL != hProcess) {
530 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
531 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
532 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
533 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
534 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
535 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
536 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
537 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
538 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
539 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
541 CloseHandle(hProcess);
551 /***********************************************************************/
552 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
554 fileName = getFullPathName(fileName);
556 fileHandle.open(fileName.c_str(), ios::app);
558 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
565 catch(exception& e) {
566 errorOut(e, "MothurOut", "openOutputFileAppend");
570 /***********************************************************************/
571 void MothurOut::gobble(istream& f){
575 while(isspace(d=f.get())) { ;}
576 if(!f.eof()) { f.putback(d); }
578 catch(exception& e) {
579 errorOut(e, "MothurOut", "gobble");
583 /***********************************************************************/
584 void MothurOut::gobble(istringstream& f){
587 while(isspace(d=f.get())) {;}
588 if(!f.eof()) { f.putback(d); }
590 catch(exception& e) {
591 errorOut(e, "MothurOut", "gobble");
596 /***********************************************************************/
598 string MothurOut::getline(istringstream& fileHandle) {
603 while (!fileHandle.eof()) {
605 char c = fileHandle.get();
607 //are you at the end of the line
608 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
615 catch(exception& e) {
616 errorOut(e, "MothurOut", "getline");
620 /***********************************************************************/
622 string MothurOut::getline(ifstream& fileHandle) {
629 char c = fileHandle.get();
631 //are you at the end of the line
632 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
639 catch(exception& e) {
640 errorOut(e, "MothurOut", "getline");
644 /***********************************************************************/
646 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
647 #ifdef USE_COMPRESSION
648 inline bool endsWith(string s, const char * suffix){
649 size_t suffixLength = strlen(suffix);
650 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
655 string MothurOut::getRootName(string longName){
658 string rootName = longName;
660 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
661 #ifdef USE_COMPRESSION
662 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
663 int pos = rootName.find_last_of('.');
664 rootName = rootName.substr(0, pos);
665 cerr << "shortening " << longName << " to " << rootName << "\n";
669 if(rootName.find_last_of(".") != rootName.npos){
670 int pos = rootName.find_last_of('.')+1;
671 rootName = rootName.substr(0, pos);
676 catch(exception& e) {
677 errorOut(e, "MothurOut", "getRootName");
681 /***********************************************************************/
683 string MothurOut::getSimpleName(string longName){
685 string simpleName = longName;
688 found=longName.find_last_of("/\\");
690 if(found != longName.npos){
691 simpleName = longName.substr(found+1);
696 catch(exception& e) {
697 errorOut(e, "MothurOut", "getSimpleName");
702 /***********************************************************************/
704 int MothurOut::getRandomIndex(int highest){
707 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
711 catch(exception& e) {
712 errorOut(e, "MothurOut", "getRandomIndex");
717 /**********************************************************************/
719 string MothurOut::getPathName(string longName){
721 string rootPathName = longName;
723 if(longName.find_last_of("/\\") != longName.npos){
724 int pos = longName.find_last_of("/\\")+1;
725 rootPathName = longName.substr(0, pos);
730 catch(exception& e) {
731 errorOut(e, "MothurOut", "getPathName");
736 /***********************************************************************/
738 bool MothurOut::dirCheck(string& dirName){
744 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
749 //add / to name if needed
750 string lastChar = dirName.substr(dirName.length()-1);
751 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
752 if (lastChar != "/") { dirName += "/"; }
754 if (lastChar != "\\") { dirName += "\\"; }
757 //test to make sure directory exists
758 dirName = getFullPathName(dirName);
759 string outTemp = dirName + tag + "temp";
761 out.open(outTemp.c_str(), ios::trunc);
763 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
766 mothurRemove(outTemp);
772 catch(exception& e) {
773 errorOut(e, "MothurOut", "dirCheck");
778 /***********************************************************************/
780 string MothurOut::hasPath(string longName){
785 found=longName.find_last_of("~/\\");
787 if(found != longName.npos){
788 path = longName.substr(0, found+1);
793 catch(exception& e) {
794 errorOut(e, "MothurOut", "hasPath");
799 /***********************************************************************/
801 string MothurOut::getExtension(string longName){
803 string extension = "";
805 if(longName.find_last_of('.') != longName.npos){
806 int pos = longName.find_last_of('.');
807 extension = longName.substr(pos, longName.length());
812 catch(exception& e) {
813 errorOut(e, "MothurOut", "getExtension");
817 /***********************************************************************/
818 bool MothurOut::isBlank(string fileName){
821 fileName = getFullPathName(fileName);
824 fileHandle.open(fileName.c_str());
826 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
829 //check for blank file
831 if (fileHandle.eof()) { fileHandle.close(); return true; }
836 catch(exception& e) {
837 errorOut(e, "MothurOut", "isBlank");
841 /***********************************************************************/
843 string MothurOut::getFullPathName(string fileName){
846 string path = hasPath(fileName);
850 if (path == "") { return fileName; } //its a simple name
851 else { //we need to complete the pathname
852 // ex. ../../../filename
853 // cwd = /user/work/desktop
856 //get current working directory
857 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
859 if (path.find("~") != -1) { //go to home directory
862 char *homepath = NULL;
863 homepath = getenv ("HOME");
864 if ( homepath != NULL) { homeDir = homepath; }
865 else { homeDir = ""; }
867 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
870 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
871 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
873 //char* cwdpath = new char[1024];
875 //cwdpath=getcwd(cwdpath,size);
878 char *cwdpath = NULL;
879 cwdpath = getcwd(NULL, 0); // or _getcwd
880 if ( cwdpath != NULL) { cwd = cwdpath; }
886 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
888 //break apart the current working directory
890 while (simpleCWD.find_first_of('/') != string::npos) {
891 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
892 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
895 //get last one // ex. ../../../filename = /user/work/desktop/filename
896 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
899 int index = dirs.size()-1;
901 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
902 if (pos == 0) { break; //you are at the end
903 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
904 path = path.substr(0, pos-1);
906 if (index == 0) { break; }
907 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
908 path = path.substr(0, pos);
909 }else if (pos == 1) { break; //you are at the end
910 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
913 for (int i = index; i >= 0; i--) {
914 newFileName = dirs[i] + "/" + newFileName;
917 newFileName = "/" + newFileName;
921 if (path.find("~") != string::npos) { //go to home directory
922 string homeDir = getenv ("HOMEPATH");
923 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
926 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
927 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
929 char *cwdpath = NULL;
930 cwdpath = getcwd(NULL, 0); // or _getcwd
931 if ( cwdpath != NULL) { cwd = cwdpath; }
934 //break apart the current working directory
936 while (cwd.find_first_of('\\') != -1) {
937 string dir = cwd.substr(0,cwd.find_first_of('\\'));
938 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
943 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
945 int index = dirs.size()-1;
947 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
948 if (pos == 0) { break; //you are at the end
949 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
950 path = path.substr(0, pos-1);
952 if (index == 0) { break; }
953 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
954 path = path.substr(0, pos);
955 }else if (pos == 1) { break; //you are at the end
956 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
959 for (int i = index; i >= 0; i--) {
960 newFileName = dirs[i] + "\\" + newFileName;
969 catch(exception& e) {
970 errorOut(e, "MothurOut", "getFullPathName");
974 /***********************************************************************/
976 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
979 string completeFileName = getFullPathName(fileName);
980 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
981 #ifdef USE_COMPRESSION
982 // check for gzipped or bzipped file
983 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
984 string tempName = string(tmpnam(0));
985 mkfifo(tempName.c_str(), 0666);
986 int fork_result = fork();
987 if (fork_result < 0) {
988 cerr << "Error forking.\n";
990 } else if (fork_result == 0) {
991 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
992 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
993 system(command.c_str());
994 cerr << "Done decompressing " << completeFileName << "\n";
995 mothurRemove(tempName);
998 cerr << "waiting on child process " << fork_result << "\n";
999 completeFileName = tempName;
1004 fileHandle.open(completeFileName.c_str());
1006 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1009 //check for blank file
1014 catch(exception& e) {
1015 errorOut(e, "MothurOut", "openInputFile - no Error");
1019 /***********************************************************************/
1021 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1024 //get full path name
1025 string completeFileName = getFullPathName(fileName);
1026 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1027 #ifdef USE_COMPRESSION
1028 // check for gzipped or bzipped file
1029 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1030 string tempName = string(tmpnam(0));
1031 mkfifo(tempName.c_str(), 0666);
1032 int fork_result = fork();
1033 if (fork_result < 0) {
1034 cerr << "Error forking.\n";
1036 } else if (fork_result == 0) {
1037 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1038 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1039 system(command.c_str());
1040 cerr << "Done decompressing " << completeFileName << "\n";
1041 mothurRemove(tempName);
1044 cerr << "waiting on child process " << fork_result << "\n";
1045 completeFileName = tempName;
1051 fileHandle.open(completeFileName.c_str());
1053 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1057 //check for blank file
1059 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1064 catch(exception& e) {
1065 errorOut(e, "MothurOut", "openInputFile");
1069 /***********************************************************************/
1071 int MothurOut::renameFile(string oldName, string newName){
1074 if (oldName == newName) { return 0; }
1077 int exist = openInputFile(newName, inTest, "");
1080 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1081 if (exist == 0) { //you could open it so you want to delete it
1082 string command = "rm " + newName;
1083 system(command.c_str());
1086 string command = "mv " + oldName + " " + newName;
1087 system(command.c_str());
1089 mothurRemove(newName);
1090 int renameOk = rename(oldName.c_str(), newName.c_str());
1095 catch(exception& e) {
1096 errorOut(e, "MothurOut", "renameFile");
1101 /***********************************************************************/
1103 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1106 string completeFileName = getFullPathName(fileName);
1107 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1108 #ifdef USE_COMPRESSION
1109 // check for gzipped file
1110 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1111 string tempName = string(tmpnam(0));
1112 mkfifo(tempName.c_str(), 0666);
1113 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1114 int fork_result = fork();
1115 if (fork_result < 0) {
1116 cerr << "Error forking.\n";
1118 } else if (fork_result == 0) {
1119 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1120 system(command.c_str());
1123 completeFileName = tempName;
1128 fileHandle.open(completeFileName.c_str(), ios::trunc);
1130 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1137 catch(exception& e) {
1138 errorOut(e, "MothurOut", "openOutputFile");
1144 /**************************************************************************************************/
1145 int MothurOut::appendFiles(string temp, string filename) {
1150 //open output file in append mode
1151 openOutputFileAppend(filename, output);
1152 int ableToOpen = openInputFile(temp, input, "no error");
1153 //int ableToOpen = openInputFile(temp, input);
1156 if (ableToOpen == 0) { //you opened it
1159 while (!input.eof()) {
1160 input.read(buffer, 4096);
1161 output.write(buffer, input.gcount());
1162 //count number of lines
1163 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1172 catch(exception& e) {
1173 errorOut(e, "MothurOut", "appendFiles");
1177 /**************************************************************************************************/
1178 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1183 //open output file in append mode
1184 openOutputFileAppend(filename, output);
1185 int ableToOpen = openInputFile(temp, input, "no error");
1186 //int ableToOpen = openInputFile(temp, input);
1189 if (ableToOpen == 0) { //you opened it
1191 string headers = getline(input); gobble(input);
1192 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1195 while (!input.eof()) {
1196 input.read(buffer, 4096);
1197 output.write(buffer, input.gcount());
1198 //count number of lines
1199 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1208 catch(exception& e) {
1209 errorOut(e, "MothurOut", "appendFiles");
1213 /**************************************************************************************************/
1214 string MothurOut::sortFile(string distFile, string outputDir){
1217 //if (outputDir == "") { outputDir += hasPath(distFile); }
1218 string outfile = getRootName(distFile) + "sorted.dist";
1221 //if you can, use the unix sort since its been optimized for years
1222 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1223 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1224 system(command.c_str());
1225 #else //you are stuck with my best attempt...
1226 //windows sort does not have a way to specify a column, only a character in the line
1227 //since we cannot assume that the distance will always be at the the same character location on each line
1228 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1230 //read in file line by file and put distance first
1231 string tempDistFile = distFile + ".temp";
1234 openInputFile(distFile, input);
1235 openOutputFile(tempDistFile, output);
1237 string firstName, secondName;
1239 while (!input.eof()) {
1240 input >> firstName >> secondName >> dist;
1241 output << dist << '\t' << firstName << '\t' << secondName << endl;
1248 //sort using windows sort
1249 string tempOutfile = outfile + ".temp";
1250 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1251 system(command.c_str());
1253 //read in sorted file and put distance at end again
1256 openInputFile(tempOutfile, input2);
1257 openOutputFile(outfile, output2);
1259 while (!input2.eof()) {
1260 input2 >> dist >> firstName >> secondName;
1261 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1268 mothurRemove(tempDistFile);
1269 mothurRemove(tempOutfile);
1274 catch(exception& e) {
1275 errorOut(e, "MothurOut", "sortFile");
1279 /**************************************************************************************************/
1280 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1282 vector<unsigned long long> positions;
1284 //openInputFile(filename, inFASTA);
1285 inFASTA.open(filename.c_str(), ios::binary);
1288 unsigned long long count = 0;
1289 while(!inFASTA.eof()){
1290 //input = getline(inFASTA);
1291 //cout << input << '\t' << inFASTA.tellg() << endl;
1292 //if (input.length() != 0) {
1293 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1295 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1296 char c = inFASTA.get(); count++;
1298 positions.push_back(count-1);
1299 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1304 num = positions.size();
1305 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1307 unsigned long long size;
1309 //get num bytes in file
1310 pFile = fopen (filename.c_str(),"rb");
1311 if (pFile==NULL) perror ("Error opening file");
1313 fseek (pFile, 0, SEEK_END);
1318 /*unsigned long long size = positions[(positions.size()-1)];
1320 openInputFile(filename, in);
1325 if(in.eof()) { break; }
1330 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1332 positions.push_back(size);
1337 catch(exception& e) {
1338 errorOut(e, "MothurOut", "setFilePosFasta");
1342 /**************************************************************************************************/
1343 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1345 filename = getFullPathName(filename);
1347 vector<unsigned long long> positions;
1349 //openInputFile(filename, in);
1350 in.open(filename.c_str(), ios::binary);
1353 unsigned long long count = 0;
1354 positions.push_back(0);
1357 //getline counting reads
1358 char d = in.get(); count++;
1359 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1360 //get next character
1366 d=in.get(); count++;
1367 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1369 positions.push_back(count-1);
1370 //cout << count-1 << endl;
1374 num = positions.size()-1;
1377 unsigned long long size;
1379 //get num bytes in file
1380 pFile = fopen (filename.c_str(),"rb");
1381 if (pFile==NULL) perror ("Error opening file");
1383 fseek (pFile, 0, SEEK_END);
1388 positions[(positions.size()-1)] = size;
1392 catch(exception& e) {
1393 errorOut(e, "MothurOut", "setFilePosEachLine");
1397 /**************************************************************************************************/
1399 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1401 vector<unsigned long long> filePos;
1402 filePos.push_back(0);
1405 unsigned long long size;
1407 filename = getFullPathName(filename);
1409 //get num bytes in file
1410 pFile = fopen (filename.c_str(),"rb");
1411 if (pFile==NULL) perror ("Error opening file");
1413 fseek (pFile, 0, SEEK_END);
1418 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1420 //estimate file breaks
1421 unsigned long long chunkSize = 0;
1422 chunkSize = size / proc;
1424 //file to small to divide by processors
1425 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1427 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1428 for (int i = 0; i < proc; i++) {
1429 unsigned long long spot = (i+1) * chunkSize;
1432 openInputFile(filename, in);
1436 unsigned long long newSpot = spot;
1440 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1441 else if (int(c) == -1) { break; }
1445 //there was not another sequence before the end of the file
1446 unsigned long long sanityPos = in.tellg();
1448 if (sanityPos == -1) { break; }
1449 else { filePos.push_back(newSpot); }
1455 filePos.push_back(size);
1457 //sanity check filePos
1458 for (int i = 0; i < (filePos.size()-1); i++) {
1459 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1462 proc = (filePos.size() - 1);
1464 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1466 filePos.push_back(size);
1470 catch(exception& e) {
1471 errorOut(e, "MothurOut", "divideFile");
1475 /**************************************************************************************************/
1477 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1479 vector<unsigned long long> filePos;
1480 filePos.push_back(0);
1483 unsigned long long size;
1485 filename = getFullPathName(filename);
1487 //get num bytes in file
1488 pFile = fopen (filename.c_str(),"rb");
1489 if (pFile==NULL) perror ("Error opening file");
1491 fseek (pFile, 0, SEEK_END);
1496 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1498 //estimate file breaks
1499 unsigned long long chunkSize = 0;
1500 chunkSize = size / proc;
1502 //file to small to divide by processors
1503 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1505 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1506 for (int i = 0; i < proc; i++) {
1507 unsigned long long spot = (i+1) * chunkSize;
1510 openInputFile(filename, in);
1513 //look for next line break
1514 unsigned long long newSpot = spot;
1518 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1519 else if (int(c) == -1) { break; }
1522 //there was not another line before the end of the file
1523 unsigned long long sanityPos = in.tellg();
1525 if (sanityPos == -1) { break; }
1526 else { filePos.push_back(newSpot); }
1532 filePos.push_back(size);
1534 //sanity check filePos
1535 for (int i = 0; i < (filePos.size()-1); i++) {
1536 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1539 proc = (filePos.size() - 1);
1541 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1543 filePos.push_back(size);
1547 catch(exception& e) {
1548 errorOut(e, "MothurOut", "divideFile");
1552 /**************************************************************************************************/
1553 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1556 vector<unsigned long long> filePos = divideFile(filename, proc);
1558 for (int i = 0; i < (filePos.size()-1); i++) {
1562 openInputFile(filename, in);
1563 in.seekg(filePos[i]);
1564 unsigned long long size = filePos[(i+1)] - filePos[i];
1565 char* chunk = new char[size];
1566 in.read(chunk, size);
1570 string fileChunkName = filename + "." + toString(i) + ".tmp";
1572 openOutputFile(fileChunkName, out);
1574 out << chunk << endl;
1579 files.push_back(fileChunkName);
1584 catch(exception& e) {
1585 errorOut(e, "MothurOut", "divideFile");
1589 /***********************************************************************/
1591 bool MothurOut::isTrue(string f){
1594 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1596 if ((f == "TRUE") || (f == "T")) { return true; }
1597 else { return false; }
1599 catch(exception& e) {
1600 errorOut(e, "MothurOut", "isTrue");
1605 /***********************************************************************/
1607 float MothurOut::roundDist(float dist, int precision){
1609 return int(dist * precision + 0.5)/float(precision);
1611 catch(exception& e) {
1612 errorOut(e, "MothurOut", "roundDist");
1616 /***********************************************************************/
1618 float MothurOut::ceilDist(float dist, int precision){
1620 return int(ceil(dist * precision))/float(precision);
1622 catch(exception& e) {
1623 errorOut(e, "MothurOut", "ceilDist");
1627 /***********************************************************************/
1629 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1631 vector<string> pieces;
1633 for (int i = 0; i < size; i++) {
1634 if (!isspace(buffer[i])) { rest += buffer[i]; }
1636 if (rest != "") { pieces.push_back(rest); rest = ""; }
1637 while (i < size) { //gobble white space
1638 if (isspace(buffer[i])) { i++; }
1639 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1646 catch(exception& e) {
1647 errorOut(e, "MothurOut", "splitWhiteSpace");
1651 /***********************************************************************/
1652 vector<string> MothurOut::splitWhiteSpace(string input){
1654 vector<string> pieces;
1657 for (int i = 0; i < input.length(); i++) {
1658 if (!isspace(input[i])) { rest += input[i]; }
1660 if (rest != "") { pieces.push_back(rest); rest = ""; }
1661 while (i < input.length()) { //gobble white space
1662 if (isspace(input[i])) { i++; }
1663 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1668 if (rest != "") { pieces.push_back(rest); }
1672 catch(exception& e) {
1673 errorOut(e, "MothurOut", "splitWhiteSpace");
1677 /***********************************************************************/
1678 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1680 vector<string> pieces;
1683 int pos = input.find('\'');
1684 int pos2 = input.find('\"');
1686 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1688 for (int i = 0; i < input.length(); i++) {
1689 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1691 for (int j = i+1; j < input.length(); j++) {
1692 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
1696 }else { rest += input[j]; }
1698 }else if (!isspace(input[i])) { rest += input[i]; }
1700 if (rest != "") { pieces.push_back(rest); rest = ""; }
1701 while (i < input.length()) { //gobble white space
1702 if (isspace(input[i])) { i++; }
1703 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1708 if (rest != "") { pieces.push_back(rest); }
1712 catch(exception& e) {
1713 errorOut(e, "MothurOut", "splitWhiteSpace");
1717 //**********************************************************************************************************************
1718 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
1722 openInputFile(namefile, in);
1726 bool pairDone = false;
1727 bool columnOne = true;
1728 string firstCol, secondCol;
1731 if (control_pressed) { break; }
1733 in.read(buffer, 4096);
1734 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1736 for (int i = 0; i < pieces.size(); i++) {
1737 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1738 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1741 checkName(firstCol);
1742 //are there confidence scores, if so remove them
1743 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1744 map<string, string>::iterator itTax = taxMap.find(firstCol);
1746 if(itTax == taxMap.end()) {
1747 bool ignore = false;
1748 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1750 if (!ignore) { taxMap[firstCol] = secondCol; }
1751 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1753 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1762 vector<string> pieces = splitWhiteSpace(rest);
1764 for (int i = 0; i < pieces.size(); i++) {
1765 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1766 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1769 checkName(firstCol);
1770 //are there confidence scores, if so remove them
1771 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1772 map<string, string>::iterator itTax = taxMap.find(firstCol);
1774 if(itTax == taxMap.end()) {
1775 bool ignore = false;
1776 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1778 if (!ignore) { taxMap[firstCol] = secondCol; }
1779 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1781 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1789 return taxMap.size();
1792 catch(exception& e) {
1793 errorOut(e, "MothurOut", "readTax");
1797 /**********************************************************************************************************************/
1798 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
1802 openInputFile(namefile, in);
1806 bool pairDone = false;
1807 bool columnOne = true;
1808 string firstCol, secondCol;
1811 if (control_pressed) { break; }
1813 in.read(buffer, 4096);
1814 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1816 for (int i = 0; i < pieces.size(); i++) {
1817 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1818 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1821 checkName(firstCol);
1822 checkName(secondCol);
1824 //parse names into vector
1825 vector<string> theseNames;
1826 splitAtComma(secondCol, theseNames);
1827 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1835 vector<string> pieces = splitWhiteSpace(rest);
1837 for (int i = 0; i < pieces.size(); i++) {
1838 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1839 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1842 checkName(firstCol);
1843 checkName(secondCol);
1845 //parse names into vector
1846 vector<string> theseNames;
1847 splitAtComma(secondCol, theseNames);
1848 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1854 return nameMap.size();
1857 catch(exception& e) {
1858 errorOut(e, "MothurOut", "readNames");
1862 /**********************************************************************************************************************/
1863 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
1867 openInputFile(namefile, in);
1871 bool pairDone = false;
1872 bool columnOne = true;
1873 string firstCol, secondCol;
1876 if (control_pressed) { break; }
1878 in.read(buffer, 4096);
1879 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1881 for (int i = 0; i < pieces.size(); i++) {
1882 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1883 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1886 checkName(firstCol);
1887 checkName(secondCol);
1888 nameMap[secondCol] = firstCol;
1896 vector<string> pieces = splitWhiteSpace(rest);
1898 for (int i = 0; i < pieces.size(); i++) {
1899 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1900 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1903 checkName(firstCol);
1904 checkName(secondCol);
1905 nameMap[secondCol] = firstCol;
1911 return nameMap.size();
1914 catch(exception& e) {
1915 errorOut(e, "MothurOut", "readNames");
1919 /**********************************************************************************************************************/
1920 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
1922 nameMap.clear(); nameCount.clear();
1925 openInputFile(namefile, in);
1929 bool pairDone = false;
1930 bool columnOne = true;
1931 string firstCol, secondCol;
1934 if (control_pressed) { break; }
1936 in.read(buffer, 4096);
1937 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1939 for (int i = 0; i < pieces.size(); i++) {
1940 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1941 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1944 checkName(firstCol);
1945 checkName(secondCol);
1946 //parse names into vector
1947 vector<string> theseNames;
1948 splitAtComma(secondCol, theseNames);
1949 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1950 nameCount[firstCol] = theseNames.size();
1958 vector<string> pieces = splitWhiteSpace(rest);
1960 for (int i = 0; i < pieces.size(); i++) {
1961 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1962 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1965 checkName(firstCol);
1966 checkName(secondCol);
1967 //parse names into vector
1968 vector<string> theseNames;
1969 splitAtComma(secondCol, theseNames);
1970 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1971 nameCount[firstCol] = theseNames.size();
1977 return nameMap.size();
1980 catch(exception& e) {
1981 errorOut(e, "MothurOut", "readNames");
1985 /**********************************************************************************************************************/
1986 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
1990 openInputFile(namefile, in);
1994 bool pairDone = false;
1995 bool columnOne = true;
1996 string firstCol, secondCol;
1999 if (control_pressed) { break; }
2001 in.read(buffer, 4096);
2002 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2004 for (int i = 0; i < pieces.size(); i++) {
2005 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2006 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2009 checkName(firstCol);
2010 checkName(secondCol);
2011 nameMap[firstCol] = secondCol; pairDone = false; }
2017 vector<string> pieces = splitWhiteSpace(rest);
2019 for (int i = 0; i < pieces.size(); i++) {
2020 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2021 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2024 checkName(firstCol);
2025 checkName(secondCol);
2026 nameMap[firstCol] = secondCol; pairDone = false; }
2030 return nameMap.size();
2033 catch(exception& e) {
2034 errorOut(e, "MothurOut", "readNames");
2038 /**********************************************************************************************************************/
2039 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2043 openInputFile(namefile, in);
2047 bool pairDone = false;
2048 bool columnOne = true;
2049 string firstCol, secondCol;
2052 if (control_pressed) { break; }
2054 in.read(buffer, 4096);
2055 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2057 for (int i = 0; i < pieces.size(); i++) {
2058 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2059 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2062 checkName(firstCol);
2063 checkName(secondCol);
2064 vector<string> temp;
2065 splitAtComma(secondCol, temp);
2066 nameMap[firstCol] = temp;
2074 vector<string> pieces = splitWhiteSpace(rest);
2076 for (int i = 0; i < pieces.size(); i++) {
2077 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2078 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2081 checkName(firstCol);
2082 checkName(secondCol);
2083 vector<string> temp;
2084 splitAtComma(secondCol, temp);
2085 nameMap[firstCol] = temp;
2091 return nameMap.size();
2093 catch(exception& e) {
2094 errorOut(e, "MothurOut", "readNames");
2098 /**********************************************************************************************************************/
2099 map<string, int> MothurOut::readNames(string namefile) {
2101 map<string, int> nameMap;
2105 openInputFile(namefile, in);
2109 bool pairDone = false;
2110 bool columnOne = true;
2111 string firstCol, secondCol;
2114 if (control_pressed) { break; }
2116 in.read(buffer, 4096);
2117 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2119 for (int i = 0; i < pieces.size(); i++) {
2120 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2121 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2124 checkName(firstCol);
2125 checkName(secondCol);
2126 int num = getNumNames(secondCol);
2127 nameMap[firstCol] = num;
2135 vector<string> pieces = splitWhiteSpace(rest);
2136 for (int i = 0; i < pieces.size(); i++) {
2137 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2138 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2141 checkName(firstCol);
2142 checkName(secondCol);
2143 int num = getNumNames(secondCol);
2144 nameMap[firstCol] = num;
2153 catch(exception& e) {
2154 errorOut(e, "MothurOut", "readNames");
2158 /**********************************************************************************************************************/
2159 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2161 map<string, int> nameMap;
2166 openInputFile(namefile, in);
2170 bool pairDone = false;
2171 bool columnOne = true;
2172 string firstCol, secondCol;
2175 if (control_pressed) { break; }
2177 in.read(buffer, 4096);
2178 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2180 for (int i = 0; i < pieces.size(); i++) {
2181 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2182 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2185 checkName(firstCol);
2186 checkName(secondCol);
2187 int num = getNumNames(secondCol);
2188 nameMap[firstCol] = num;
2197 vector<string> pieces = splitWhiteSpace(rest);
2198 for (int i = 0; i < pieces.size(); i++) {
2199 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2200 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2203 checkName(firstCol);
2204 checkName(secondCol);
2205 int num = getNumNames(secondCol);
2206 nameMap[firstCol] = num;
2216 catch(exception& e) {
2217 errorOut(e, "MothurOut", "readNames");
2221 /************************************************************/
2222 int MothurOut::checkName(string& name) {
2224 for (int i = 0; i < name.length(); i++) {
2225 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2229 catch(exception& e) {
2230 errorOut(e, "MothurOut", "checkName");
2234 /**********************************************************************************************************************/
2235 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2241 openInputFile(namefile, in);
2245 bool pairDone = false;
2246 bool columnOne = true;
2247 string firstCol, secondCol;
2250 if (control_pressed) { break; }
2252 in.read(buffer, 4096);
2253 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2255 for (int i = 0; i < pieces.size(); i++) {
2256 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2257 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2260 checkName(firstCol);
2261 checkName(secondCol);
2262 int num = getNumNames(secondCol);
2264 map<string, string>::iterator it = fastamap.find(firstCol);
2265 if (it == fastamap.end()) {
2267 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2269 seqPriorityNode temp(num, it->second, firstCol);
2270 nameVector.push_back(temp);
2280 vector<string> pieces = splitWhiteSpace(rest);
2282 for (int i = 0; i < pieces.size(); i++) {
2283 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2284 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2287 checkName(firstCol);
2288 checkName(secondCol);
2289 int num = getNumNames(secondCol);
2291 map<string, string>::iterator it = fastamap.find(firstCol);
2292 if (it == fastamap.end()) {
2294 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2296 seqPriorityNode temp(num, it->second, firstCol);
2297 nameVector.push_back(temp);
2306 catch(exception& e) {
2307 errorOut(e, "MothurOut", "readNames");
2311 //**********************************************************************************************************************
2312 set<string> MothurOut::readAccnos(string accnosfile){
2316 openInputFile(accnosfile, in);
2323 if (control_pressed) { break; }
2325 in.read(buffer, 4096);
2326 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2328 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2333 vector<string> pieces = splitWhiteSpace(rest);
2334 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2338 catch(exception& e) {
2339 errorOut(e, "MothurOut", "readAccnos");
2343 //**********************************************************************************************************************
2344 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2348 openInputFile(accnosfile, in);
2355 if (control_pressed) { break; }
2357 in.read(buffer, 4096);
2358 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2360 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2365 vector<string> pieces = splitWhiteSpace(rest);
2366 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2371 catch(exception& e) {
2372 errorOut(e, "MothurOut", "readAccnos");
2376 /***********************************************************************/
2378 int MothurOut::getNumNames(string names){
2384 for(int i=0;i<names.size();i++){
2385 if(names[i] == ','){
2393 catch(exception& e) {
2394 errorOut(e, "MothurOut", "getNumNames");
2398 /***********************************************************************/
2400 int MothurOut::getNumChar(string line, char c){
2405 for(int i=0;i<line.size();i++){
2414 catch(exception& e) {
2415 errorOut(e, "MothurOut", "getNumChar");
2419 //**********************************************************************************************************************
2420 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2424 if (subset.size() > bigset.size()) { return false; }
2426 //check if each guy in suset is also in bigset
2427 for (int i = 0; i < subset.size(); i++) {
2429 for (int j = 0; j < bigset.size(); j++) {
2430 if (subset[i] == bigset[j]) { match = true; break; }
2433 //you have a guy in subset that had no match in bigset
2434 if (match == false) { return false; }
2440 catch(exception& e) {
2441 errorOut(e, "MothurOut", "isSubset");
2445 /***********************************************************************/
2446 int MothurOut::mothurRemove(string filename){
2448 filename = getFullPathName(filename);
2449 int error = remove(filename.c_str());
2451 // if (errno != ENOENT) { //ENOENT == file does not exist
2452 // string message = "Error deleting file " + filename;
2453 // perror(message.c_str());
2458 catch(exception& e) {
2459 errorOut(e, "MothurOut", "mothurRemove");
2463 /***********************************************************************/
2464 bool MothurOut::mothurConvert(string item, int& num){
2468 if (isNumeric1(item)) {
2473 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2474 commandInputsConvertError = true;
2479 catch(exception& e) {
2480 errorOut(e, "MothurOut", "mothurConvert");
2484 /***********************************************************************/
2485 bool MothurOut::mothurConvert(string item, intDist& num){
2489 if (isNumeric1(item)) {
2494 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2495 commandInputsConvertError = true;
2500 catch(exception& e) {
2501 errorOut(e, "MothurOut", "mothurConvert");
2506 /***********************************************************************/
2507 bool MothurOut::isNumeric1(string stringToCheck){
2509 bool numeric = false;
2511 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2515 catch(exception& e) {
2516 errorOut(e, "MothurOut", "isNumeric1");
2521 /***********************************************************************/
2522 bool MothurOut::mothurConvert(string item, float& num){
2526 if (isNumeric1(item)) {
2531 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2532 commandInputsConvertError = true;
2537 catch(exception& e) {
2538 errorOut(e, "MothurOut", "mothurConvert");
2542 /***********************************************************************/
2543 bool MothurOut::mothurConvert(string item, double& num){
2547 if (isNumeric1(item)) {
2552 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2553 commandInputsConvertError = true;
2558 catch(exception& e) {
2559 errorOut(e, "MothurOut", "mothurConvert");
2563 /**************************************************************************************************/
2565 vector<vector<double> > MothurOut::binomial(int maxOrder){
2567 vector<vector<double> > binomial(maxOrder+1);
2569 for(int i=0;i<=maxOrder;i++){
2570 binomial[i].resize(maxOrder+1);
2579 for(int i=2;i<=maxOrder;i++){
2583 for(int i=2;i<=maxOrder;i++){
2584 for(int j=1;j<=maxOrder;j++){
2585 if(i==j){ binomial[i][j]=1; }
2586 if(j>i) { binomial[i][j]=0; }
2587 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2594 catch(exception& e) {
2595 errorOut(e, "MothurOut", "binomial");
2599 /**************************************************************************************************/
2600 unsigned int MothurOut::fromBase36(string base36){
2602 unsigned int num = 0;
2604 map<char, int> converts;
2669 while (i < base36.length()) {
2671 num = 36 * num + converts[c];
2678 catch(exception& e) {
2679 errorOut(e, "MothurOut", "fromBase36");
2683 /***********************************************************************/
2685 int MothurOut::factorial(int num){
2689 for (int i = 1; i <= num; i++) {
2695 catch(exception& e) {
2696 errorOut(e, "MothurOut", "factorial");
2700 /***********************************************************************/
2702 int MothurOut::getNumSeqs(ifstream& file){
2704 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
2708 catch(exception& e) {
2709 errorOut(e, "MothurOut", "getNumSeqs");
2713 /***********************************************************************/
2714 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
2719 input = getline(file);
2720 if (input.length() != 0) {
2721 if(input[0] == '>'){ numSeqs++; }
2725 catch(exception& e) {
2726 errorOut(e, "MothurOut", "getNumSeqs");
2730 /***********************************************************************/
2732 //This function parses the estimator options and puts them in a vector
2733 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
2736 if (symbol == '-') { splitAtDash(estim, container); return; }
2738 string individual = "";
2739 int estimLength = estim.size();
2740 for(int i=0;i<estimLength;i++){
2741 if(estim[i] == symbol){
2742 container.push_back(individual);
2746 individual += estim[i];
2749 container.push_back(individual);
2752 catch(exception& e) {
2753 errorOut(e, "MothurOut", "splitAtChar");
2758 /***********************************************************************/
2760 //This function parses the estimator options and puts them in a vector
2761 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
2763 string individual = "";
2764 int estimLength = estim.size();
2765 bool prevEscape = false;
2766 /*for(int i=0;i<estimLength;i++){
2768 individual += estim[i];
2772 if(estim[i] == '\\'){
2775 else if(estim[i] == '-'){
2776 container.push_back(individual);
2781 individual += estim[i];
2788 for(int i=0;i<estimLength;i++){
2789 if(estim[i] == '-'){
2790 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2792 container.push_back(individual);
2795 }else if(estim[i] == '\\'){
2796 if (i < estimLength-1) {
2797 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2798 else { individual += estim[i]; prevEscape = false; } //if no, add in
2799 }else { individual += estim[i]; }
2801 individual += estim[i];
2807 container.push_back(individual);
2809 catch(exception& e) {
2810 errorOut(e, "MothurOut", "splitAtDash");
2815 /***********************************************************************/
2816 //This function parses the label options and puts them in a set
2817 void MothurOut::splitAtDash(string& estim, set<string>& container) {
2819 string individual = "";
2820 int estimLength = estim.size();
2821 bool prevEscape = false;
2823 for(int i=0;i<estimLength;i++){
2825 individual += estim[i];
2829 if(estim[i] == '\\'){
2832 else if(estim[i] == '-'){
2833 container.insert(individual);
2838 individual += estim[i];
2845 for(int i=0;i<estimLength;i++){
2846 if(estim[i] == '-'){
2847 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2849 container.insert(individual);
2852 }else if(estim[i] == '\\'){
2853 if (i < estimLength-1) {
2854 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2855 else { individual += estim[i]; prevEscape = false; } //if no, add in
2856 }else { individual += estim[i]; }
2858 individual += estim[i];
2861 container.insert(individual);
2864 catch(exception& e) {
2865 errorOut(e, "MothurOut", "splitAtDash");
2869 /***********************************************************************/
2870 //This function parses the line options and puts them in a set
2871 void MothurOut::splitAtDash(string& estim, set<int>& container) {
2873 string individual = "";
2875 int estimLength = estim.size();
2876 bool prevEscape = false;
2878 for(int i=0;i<estimLength;i++){
2880 individual += estim[i];
2884 if(estim[i] == '\\'){
2887 else if(estim[i] == '-'){
2888 convert(individual, lineNum); //convert the string to int
2889 container.insert(lineNum);
2894 individual += estim[i];
2900 for(int i=0;i<estimLength;i++){
2901 if(estim[i] == '-'){
2902 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2904 convert(individual, lineNum); //convert the string to int
2905 container.insert(lineNum);
2908 }else if(estim[i] == '\\'){
2909 if (i < estimLength-1) {
2910 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2911 else { individual += estim[i]; prevEscape = false; } //if no, add in
2912 }else { individual += estim[i]; }
2914 individual += estim[i];
2918 convert(individual, lineNum); //convert the string to int
2919 container.insert(lineNum);
2921 catch(exception& e) {
2922 errorOut(e, "MothurOut", "splitAtDash");
2927 /***********************************************************************/
2928 string MothurOut::makeList(vector<string>& names) {
2932 if (names.size() == 0) { return list; }
2934 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
2937 list += names[names.size()-1];
2941 catch(exception& e) {
2942 errorOut(e, "MothurOut", "makeList");
2947 /***********************************************************************/
2948 //This function parses the a string and puts peices in a vector
2949 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
2951 string individual = "";
2952 int estimLength = estim.size();
2953 for(int i=0;i<estimLength;i++){
2954 if(estim[i] == ','){
2955 container.push_back(individual);
2959 individual += estim[i];
2962 container.push_back(individual);
2967 // string individual;
2969 // while (estim.find_first_of(',') != -1) {
2970 // individual = estim.substr(0,estim.find_first_of(','));
2971 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
2972 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
2973 // container.push_back(individual);
2977 // container.push_back(estim);
2979 catch(exception& e) {
2980 errorOut(e, "MothurOut", "splitAtComma");
2984 /***********************************************************************/
2985 //This function splits up the various option parameters
2986 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
2988 prefix = suffix.substr(0,suffix.find_first_of(c));
2989 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
2990 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
2992 while(suffix.at(0) == ' ')
2993 suffix = suffix.substr(1, suffix.length());
2994 }else { suffix = ""; }
2997 catch(exception& e) {
2998 errorOut(e, "MothurOut", "splitAtChar");
3003 /***********************************************************************/
3005 //This function splits up the various option parameters
3006 void MothurOut::splitAtComma(string& prefix, string& suffix){
3008 prefix = suffix.substr(0,suffix.find_first_of(','));
3009 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3010 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3012 while(suffix.at(0) == ' ')
3013 suffix = suffix.substr(1, suffix.length());
3014 }else { suffix = ""; }
3017 catch(exception& e) {
3018 errorOut(e, "MothurOut", "splitAtComma");
3022 /***********************************************************************/
3024 //This function separates the key value from the option value i.e. dist=96_...
3025 void MothurOut::splitAtEquals(string& key, string& value){
3027 if(value.find_first_of('=') != -1){
3028 key = value.substr(0,value.find_first_of('='));
3029 if ((value.find_first_of('=')+1) <= value.length()) {
3030 value = value.substr(value.find_first_of('=')+1, value.length());
3037 catch(exception& e) {
3038 errorOut(e, "MothurOut", "splitAtEquals");
3043 /**************************************************************************************************/
3045 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3047 for (int i = 0; i < Groups.size(); i++) {
3048 if (groupname == Groups[i]) { return true; }
3052 catch(exception& e) {
3053 errorOut(e, "MothurOut", "inUsersGroups");
3057 /**************************************************************************************************/
3059 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3061 for (int i = 0; i < sets.size(); i++) {
3062 if (set == sets[i]) { return true; }
3066 catch(exception& e) {
3067 errorOut(e, "MothurOut", "inUsersGroups");
3071 /**************************************************************************************************/
3073 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3075 for (int i = 0; i < Groups.size(); i++) {
3076 if (groupname == Groups[i]) { return true; }
3080 catch(exception& e) {
3081 errorOut(e, "MothurOut", "inUsersGroups");
3086 /**************************************************************************************************/
3087 //returns true if any of the strings in first vector are in second vector
3088 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3091 for (int i = 0; i < groupnames.size(); i++) {
3092 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3096 catch(exception& e) {
3097 errorOut(e, "MothurOut", "inUsersGroups");
3101 /***********************************************************************/
3102 //this function determines if the user has given us labels that are smaller than the given label.
3103 //if so then it returns true so that the calling function can run the previous valid distance.
3104 //it's a "smart" distance function. It also checks for invalid labels.
3105 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3108 set<string>::iterator it;
3109 vector<float> orderFloat;
3110 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3111 map<string, float>::iterator it2;
3113 bool smaller = false;
3115 //unique is the smallest line
3116 if (label == "unique") { return false; }
3118 if (convertTestFloat(label, labelFloat)) {
3119 convert(label, labelFloat);
3120 }else { //cant convert
3125 //go through users set and make them floats
3126 for(it = userLabels.begin(); it != userLabels.end();) {
3129 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3131 orderFloat.push_back(temp);
3132 userMap[*it] = temp;
3134 }else if (*it == "unique") {
3135 orderFloat.push_back(-1.0);
3136 userMap["unique"] = -1.0;
3139 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3140 userLabels.erase(it++);
3145 sort(orderFloat.begin(), orderFloat.end());
3147 /*************************************************/
3148 //is this label bigger than any of the users labels
3149 /*************************************************/
3151 //loop through order until you find a label greater than label
3152 for (int i = 0; i < orderFloat.size(); i++) {
3153 if (orderFloat[i] < labelFloat) {
3155 if (orderFloat[i] == -1) {
3156 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3157 userLabels.erase("unique");
3160 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3162 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3163 if (it2->second == orderFloat[i]) {
3165 //remove small labels
3166 userLabels.erase(s);
3170 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3172 //since they are sorted once you find a bigger one stop looking
3179 catch(exception& e) {
3180 errorOut(e, "MothurOut", "anyLabelsToProcess");
3185 /**************************************************************************************************/
3186 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3191 string line = getline(file);
3193 //before we added this check
3194 if (line[0] != '#') { good = false; }
3197 line = line.substr(1);
3199 vector<string> versionVector;
3200 splitAtChar(version, versionVector, '.');
3202 //check file version
3203 vector<string> linesVector;
3204 splitAtChar(line, linesVector, '.');
3206 if (versionVector.size() != linesVector.size()) { good = false; }
3208 for (int j = 0; j < versionVector.size(); j++) {
3210 convert(versionVector[j], num1);
3211 convert(linesVector[j], num2);
3213 //if mothurs version is newer than this files version, then we want to remake it
3214 if (num1 > num2) { good = false; break; }
3220 if (!good) { file.close(); }
3221 else { file.seekg(0); }
3225 catch(exception& e) {
3226 errorOut(e, "MothurOut", "checkReleaseVersion");
3230 /**************************************************************************************************/
3231 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3233 vector<double> averages; //averages.resize(numComp, 0.0);
3234 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3236 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3237 for (int i = 0; i < dists[thisIter].size(); i++) {
3238 averages[i] += dists[thisIter][i];
3243 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3247 catch(exception& e) {
3248 errorOut(e, "MothurOut", "getAverages");
3252 /**************************************************************************************************/
3253 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3256 vector<double> averages = getAverages(dists);
3258 //find standard deviation
3259 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3260 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3262 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3263 for (int j = 0; j < dists[thisIter].size(); j++) {
3264 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3267 for (int i = 0; i < stdDev.size(); i++) {
3268 stdDev[i] /= (double) dists.size();
3269 stdDev[i] = sqrt(stdDev[i]);
3274 catch(exception& e) {
3275 errorOut(e, "MothurOut", "getAverages");
3279 /**************************************************************************************************/
3280 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3282 //find standard deviation
3283 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3284 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3286 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3287 for (int j = 0; j < dists[thisIter].size(); j++) {
3288 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3291 for (int i = 0; i < stdDev.size(); i++) {
3292 stdDev[i] /= (double) dists.size();
3293 stdDev[i] = sqrt(stdDev[i]);
3298 catch(exception& e) {
3299 errorOut(e, "MothurOut", "getAverages");
3303 /**************************************************************************************************/
3304 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3307 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3308 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3309 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3310 vector<seqDist> temp;
3311 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3313 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3314 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3315 tempDist.dist = 0.0;
3316 temp.push_back(tempDist);
3318 calcAverages.push_back(temp);
3321 if (mode == "average") {
3322 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3323 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3324 for (int j = 0; j < calcAverages[i].size(); j++) {
3325 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3330 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3331 for (int j = 0; j < calcAverages[i].size(); j++) {
3332 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3335 }else { //find median
3336 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3337 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3338 vector<double> dists;
3339 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3340 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3342 sort(dists.begin(), dists.end());
3343 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3348 return calcAverages;
3350 catch(exception& e) {
3351 errorOut(e, "MothurOut", "getAverages");
3355 /**************************************************************************************************/
3356 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3359 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3360 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3361 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3362 vector<seqDist> temp;
3363 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3365 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3366 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3367 tempDist.dist = 0.0;
3368 temp.push_back(tempDist);
3370 calcAverages.push_back(temp);
3374 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3375 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3376 for (int j = 0; j < calcAverages[i].size(); j++) {
3377 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3382 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3383 for (int j = 0; j < calcAverages[i].size(); j++) {
3384 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3388 return calcAverages;
3390 catch(exception& e) {
3391 errorOut(e, "MothurOut", "getAverages");
3395 /**************************************************************************************************/
3396 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3399 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3401 //find standard deviation
3402 vector< vector<seqDist> > stdDev;
3403 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3404 vector<seqDist> temp;
3405 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3407 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3408 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3409 tempDist.dist = 0.0;
3410 temp.push_back(tempDist);
3412 stdDev.push_back(temp);
3415 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3416 for (int i = 0; i < stdDev.size(); i++) {
3417 for (int j = 0; j < stdDev[i].size(); j++) {
3418 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3423 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3424 for (int j = 0; j < stdDev[i].size(); j++) {
3425 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3426 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3432 catch(exception& e) {
3433 errorOut(e, "MothurOut", "getAverages");
3437 /**************************************************************************************************/
3438 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3440 //find standard deviation
3441 vector< vector<seqDist> > stdDev;
3442 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3443 vector<seqDist> temp;
3444 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3446 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3447 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3448 tempDist.dist = 0.0;
3449 temp.push_back(tempDist);
3451 stdDev.push_back(temp);
3454 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3455 for (int i = 0; i < stdDev.size(); i++) {
3456 for (int j = 0; j < stdDev[i].size(); j++) {
3457 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3462 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3463 for (int j = 0; j < stdDev[i].size(); j++) {
3464 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3465 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3471 catch(exception& e) {
3472 errorOut(e, "MothurOut", "getAverages");
3477 /**************************************************************************************************/
3478 bool MothurOut::isContainingOnlyDigits(string input) {
3481 //are you a digit in ascii code
3482 for (int i = 0;i < input.length(); i++){
3483 if( input[i]>47 && input[i]<58){}
3484 else { return false; }
3489 catch(exception& e) {
3490 errorOut(e, "MothurOut", "isContainingOnlyDigits");
3494 /**************************************************************************************************/
3495 int MothurOut::removeConfidences(string& tax) {
3501 while (tax.find_first_of(';') != -1) {
3503 if (control_pressed) { return 0; }
3506 taxon = tax.substr(0,tax.find_first_of(';'));
3508 int pos = taxon.find_last_of('(');
3511 int pos2 = taxon.find_last_of(')');
3513 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
3514 if (isNumeric1(confidenceScore)) {
3515 taxon = taxon.substr(0, pos); //rip off confidence
3521 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
3529 catch(exception& e) {
3530 errorOut(e, "MothurOut", "removeConfidences");
3534 /**************************************************************************************************/
3535 string MothurOut::removeQuotes(string tax) {
3541 for (int i = 0; i < tax.length(); i++) {
3543 if (control_pressed) { return newTax; }
3545 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
3551 catch(exception& e) {
3552 errorOut(e, "MothurOut", "removeQuotes");
3556 /**************************************************************************************************/
3557 // function for calculating standard deviation
3558 double MothurOut::getStandardDeviation(vector<int>& featureVector){
3562 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
3563 average /= (double) featureVector.size();
3565 //find standard deviation
3567 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
3568 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
3571 stdDev /= (double) featureVector.size();
3572 stdDev = sqrt(stdDev);
3576 catch(exception& e) {
3577 errorOut(e, "MothurOut", "getStandardDeviation");
3581 /**************************************************************************************************/