5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutJustToScreen(string output) {
356 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
358 if (pid == 0) { //only one process should output to screen
366 catch(exception& e) {
367 errorOut(e, "MothurOut", "MothurOut");
371 /*********************************************************************************************/
372 void MothurOut::mothurOutEndLine() {
376 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
378 if (pid == 0) { //only one process should output to screen
388 catch(exception& e) {
389 errorOut(e, "MothurOut", "MothurOutEndLine");
393 /*********************************************************************************************/
394 void MothurOut::mothurOut(string output, ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
406 outputFile << output;
414 catch(exception& e) {
415 errorOut(e, "MothurOut", "MothurOut");
419 /*********************************************************************************************/
420 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
424 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
426 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutEndLine");
442 /*********************************************************************************************/
443 void MothurOut::mothurOutJustToLog(string output) {
447 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
449 if (pid == 0) { //only one process should output to screen
458 catch(exception& e) {
459 errorOut(e, "MothurOut", "MothurOutJustToLog");
463 /*********************************************************************************************/
464 void MothurOut::errorOut(exception& e, string object, string function) {
466 //mem_usage(vm, rss);
468 string errorType = toString(e.what());
470 int pos = errorType.find("bad_alloc");
471 mothurOut("[ERROR]: ");
472 mothurOut(errorType);
474 if (pos == string::npos) { //not bad_alloc
475 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
478 if (object == "cluster"){
479 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
480 }else if (object == "shhh.flows"){
481 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
483 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
487 /*********************************************************************************************/
488 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
489 // process_mem_usage(double &, double &) - takes two doubles by reference,
490 // attempts to read the system-dependent data for a process' virtual memory
491 // size and resident set size, and return the results in KB.
493 // On failure, returns 0.0, 0.0
494 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
495 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
500 // 'file' stat seems to give the most reliable results
502 ifstream stat_stream("/proc/self/stat",ios_base::in);
504 // dummy vars for leading entries in stat that we don't care about
506 string pid, comm, state, ppid, pgrp, session, tty_nr;
507 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
508 string utime, stime, cutime, cstime, priority, nice;
509 string O, itrealvalue, starttime;
511 // the two fields we want
516 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
517 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
518 >> utime >> stime >> cutime >> cstime >> priority >> nice
519 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
521 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
522 vm_usage = vsize / 1024.0;
523 resident_set = rss * page_size_kb;
525 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
529 /* //windows memory usage
530 // Get the list of process identifiers.
531 DWORD aProcesses[1024], cbNeeded, cProcesses;
533 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
535 // Calculate how many process identifiers were returned.
536 cProcesses = cbNeeded / sizeof(DWORD);
538 // Print the memory usage for each process
539 for (int i = 0; i < cProcesses; i++ ) {
540 DWORD processID = aProcesses[i];
542 PROCESS_MEMORY_COUNTERS pmc;
544 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
546 // Print the process identifier.
547 printf( "\nProcess ID: %u\n", processID);
549 if (NULL != hProcess) {
551 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
552 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
553 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
554 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
555 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
556 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
557 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
558 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
559 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
560 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
562 CloseHandle(hProcess);
572 /***********************************************************************/
573 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
575 fileName = getFullPathName(fileName);
577 fileHandle.open(fileName.c_str(), ios::app);
579 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
586 catch(exception& e) {
587 errorOut(e, "MothurOut", "openOutputFileAppend");
591 /***********************************************************************/
592 void MothurOut::gobble(istream& f){
596 while(isspace(d=f.get())) { ;}
597 if(!f.eof()) { f.putback(d); }
599 catch(exception& e) {
600 errorOut(e, "MothurOut", "gobble");
604 /***********************************************************************/
605 void MothurOut::gobble(istringstream& f){
608 while(isspace(d=f.get())) {;}
609 if(!f.eof()) { f.putback(d); }
611 catch(exception& e) {
612 errorOut(e, "MothurOut", "gobble");
617 /***********************************************************************/
619 string MothurOut::getline(istringstream& fileHandle) {
624 while (!fileHandle.eof()) {
626 char c = fileHandle.get();
628 //are you at the end of the line
629 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
636 catch(exception& e) {
637 errorOut(e, "MothurOut", "getline");
641 /***********************************************************************/
643 string MothurOut::getline(ifstream& fileHandle) {
650 char c = fileHandle.get();
652 //are you at the end of the line
653 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
660 catch(exception& e) {
661 errorOut(e, "MothurOut", "getline");
665 /***********************************************************************/
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
668 #ifdef USE_COMPRESSION
669 inline bool endsWith(string s, const char * suffix){
670 size_t suffixLength = strlen(suffix);
671 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
676 string MothurOut::getRootName(string longName){
679 string rootName = longName;
681 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
682 #ifdef USE_COMPRESSION
683 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
684 int pos = rootName.find_last_of('.');
685 rootName = rootName.substr(0, pos);
686 cerr << "shortening " << longName << " to " << rootName << "\n";
690 if(rootName.find_last_of(".") != rootName.npos){
691 int pos = rootName.find_last_of('.')+1;
692 rootName = rootName.substr(0, pos);
697 catch(exception& e) {
698 errorOut(e, "MothurOut", "getRootName");
702 /***********************************************************************/
704 string MothurOut::getSimpleName(string longName){
706 string simpleName = longName;
709 found=longName.find_last_of("/\\");
711 if(found != longName.npos){
712 simpleName = longName.substr(found+1);
717 catch(exception& e) {
718 errorOut(e, "MothurOut", "getSimpleName");
723 /***********************************************************************/
725 int MothurOut::getRandomIndex(int highest){
728 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
732 catch(exception& e) {
733 errorOut(e, "MothurOut", "getRandomIndex");
738 /**********************************************************************/
740 string MothurOut::getPathName(string longName){
742 string rootPathName = longName;
744 if(longName.find_last_of("/\\") != longName.npos){
745 int pos = longName.find_last_of("/\\")+1;
746 rootPathName = longName.substr(0, pos);
751 catch(exception& e) {
752 errorOut(e, "MothurOut", "getPathName");
757 /***********************************************************************/
759 bool MothurOut::dirCheck(string& dirName){
765 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
770 //add / to name if needed
771 string lastChar = dirName.substr(dirName.length()-1);
772 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
773 if (lastChar != "/") { dirName += "/"; }
775 if (lastChar != "\\") { dirName += "\\"; }
778 //test to make sure directory exists
779 dirName = getFullPathName(dirName);
780 string outTemp = dirName + tag + "temp";
782 out.open(outTemp.c_str(), ios::trunc);
784 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
787 mothurRemove(outTemp);
793 catch(exception& e) {
794 errorOut(e, "MothurOut", "dirCheck");
799 //**********************************************************************************************************************
801 map<string, vector<string> > MothurOut::parseClasses(string classes){
803 map<string, vector<string> > parts;
805 //treatment<Early|Late>-age<young|old>
806 vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
808 for (int i = 0; i < pieces.size(); i++) {
809 string category = ""; string value = "";
810 bool foundOpen = false;
811 for (int j = 0; j < pieces[i].length(); j++) {
812 if (control_pressed) { return parts; }
814 if (pieces[i][j] == '<') { foundOpen = true; }
815 else if (pieces[i][j] == '>') { j += pieces[i].length(); }
817 if (!foundOpen) { category += pieces[i][j]; }
818 else { value += pieces[i][j]; }
821 vector<string> values; splitAtChar(value, values, '|');
822 parts[category] = values;
827 catch(exception& e) {
828 errorOut(e, "MothurOut", "parseClasses");
832 /***********************************************************************/
834 string MothurOut::hasPath(string longName){
839 found=longName.find_last_of("~/\\");
841 if(found != longName.npos){
842 path = longName.substr(0, found+1);
847 catch(exception& e) {
848 errorOut(e, "MothurOut", "hasPath");
853 /***********************************************************************/
855 string MothurOut::getExtension(string longName){
857 string extension = "";
859 if(longName.find_last_of('.') != longName.npos){
860 int pos = longName.find_last_of('.');
861 extension = longName.substr(pos, longName.length());
866 catch(exception& e) {
867 errorOut(e, "MothurOut", "getExtension");
871 /***********************************************************************/
872 bool MothurOut::isBlank(string fileName){
875 fileName = getFullPathName(fileName);
878 fileHandle.open(fileName.c_str());
880 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
883 //check for blank file
885 if (fileHandle.eof()) { fileHandle.close(); return true; }
890 catch(exception& e) {
891 errorOut(e, "MothurOut", "isBlank");
895 /***********************************************************************/
897 string MothurOut::getFullPathName(string fileName){
900 string path = hasPath(fileName);
904 if (path == "") { return fileName; } //its a simple name
905 else { //we need to complete the pathname
906 // ex. ../../../filename
907 // cwd = /user/work/desktop
910 //get current working directory
911 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
913 if (path.find("~") != -1) { //go to home directory
916 char *homepath = NULL;
917 homepath = getenv ("HOME");
918 if ( homepath != NULL) { homeDir = homepath; }
919 else { homeDir = ""; }
921 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
924 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
925 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
927 //char* cwdpath = new char[1024];
929 //cwdpath=getcwd(cwdpath,size);
932 char *cwdpath = NULL;
933 cwdpath = getcwd(NULL, 0); // or _getcwd
934 if ( cwdpath != NULL) { cwd = cwdpath; }
940 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
942 //break apart the current working directory
944 while (simpleCWD.find_first_of('/') != string::npos) {
945 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
946 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
949 //get last one // ex. ../../../filename = /user/work/desktop/filename
950 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
953 int index = dirs.size()-1;
955 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
956 if (pos == 0) { break; //you are at the end
957 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
958 path = path.substr(0, pos-1);
960 if (index == 0) { break; }
961 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
962 path = path.substr(0, pos);
963 }else if (pos == 1) { break; //you are at the end
964 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
967 for (int i = index; i >= 0; i--) {
968 newFileName = dirs[i] + "/" + newFileName;
971 newFileName = "/" + newFileName;
975 if (path.find("~") != string::npos) { //go to home directory
976 string homeDir = getenv ("HOMEPATH");
977 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
980 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
981 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
983 char *cwdpath = NULL;
984 cwdpath = getcwd(NULL, 0); // or _getcwd
985 if ( cwdpath != NULL) { cwd = cwdpath; }
988 //break apart the current working directory
990 while (cwd.find_first_of('\\') != -1) {
991 string dir = cwd.substr(0,cwd.find_first_of('\\'));
992 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
997 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
999 int index = dirs.size()-1;
1001 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
1002 if (pos == 0) { break; //you are at the end
1003 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
1004 path = path.substr(0, pos-1);
1006 if (index == 0) { break; }
1007 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
1008 path = path.substr(0, pos);
1009 }else if (pos == 1) { break; //you are at the end
1010 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
1013 for (int i = index; i >= 0; i--) {
1014 newFileName = dirs[i] + "\\" + newFileName;
1023 catch(exception& e) {
1024 errorOut(e, "MothurOut", "getFullPathName");
1028 /***********************************************************************/
1030 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1032 //get full path name
1033 string completeFileName = getFullPathName(fileName);
1034 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1035 #ifdef USE_COMPRESSION
1036 // check for gzipped or bzipped file
1037 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1038 string tempName = string(tmpnam(0));
1039 mkfifo(tempName.c_str(), 0666);
1040 int fork_result = fork();
1041 if (fork_result < 0) {
1042 cerr << "Error forking.\n";
1044 } else if (fork_result == 0) {
1045 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1046 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1047 system(command.c_str());
1048 cerr << "Done decompressing " << completeFileName << "\n";
1049 mothurRemove(tempName);
1052 cerr << "waiting on child process " << fork_result << "\n";
1053 completeFileName = tempName;
1058 fileHandle.open(completeFileName.c_str());
1060 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1063 //check for blank file
1068 catch(exception& e) {
1069 errorOut(e, "MothurOut", "openInputFile - no Error");
1073 /***********************************************************************/
1075 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1078 //get full path name
1079 string completeFileName = getFullPathName(fileName);
1080 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1081 #ifdef USE_COMPRESSION
1082 // check for gzipped or bzipped file
1083 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1084 string tempName = string(tmpnam(0));
1085 mkfifo(tempName.c_str(), 0666);
1086 int fork_result = fork();
1087 if (fork_result < 0) {
1088 cerr << "Error forking.\n";
1090 } else if (fork_result == 0) {
1091 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1092 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1093 system(command.c_str());
1094 cerr << "Done decompressing " << completeFileName << "\n";
1095 mothurRemove(tempName);
1098 cerr << "waiting on child process " << fork_result << "\n";
1099 completeFileName = tempName;
1105 fileHandle.open(completeFileName.c_str());
1107 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1111 //check for blank file
1113 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1118 catch(exception& e) {
1119 errorOut(e, "MothurOut", "openInputFile");
1123 /***********************************************************************/
1125 int MothurOut::renameFile(string oldName, string newName){
1128 if (oldName == newName) { return 0; }
1131 int exist = openInputFile(newName, inTest, "");
1134 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1135 if (exist == 0) { //you could open it so you want to delete it
1136 string command = "rm " + newName;
1137 system(command.c_str());
1140 string command = "mv " + oldName + " " + newName;
1141 system(command.c_str());
1143 mothurRemove(newName);
1144 int renameOk = rename(oldName.c_str(), newName.c_str());
1149 catch(exception& e) {
1150 errorOut(e, "MothurOut", "renameFile");
1155 /***********************************************************************/
1157 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1160 string completeFileName = getFullPathName(fileName);
1161 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1162 #ifdef USE_COMPRESSION
1163 // check for gzipped file
1164 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1165 string tempName = string(tmpnam(0));
1166 mkfifo(tempName.c_str(), 0666);
1167 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1168 int fork_result = fork();
1169 if (fork_result < 0) {
1170 cerr << "Error forking.\n";
1172 } else if (fork_result == 0) {
1173 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1174 system(command.c_str());
1177 completeFileName = tempName;
1182 fileHandle.open(completeFileName.c_str(), ios::trunc);
1184 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1191 catch(exception& e) {
1192 errorOut(e, "MothurOut", "openOutputFile");
1198 /**************************************************************************************************/
1199 int MothurOut::appendFiles(string temp, string filename) {
1204 //open output file in append mode
1205 openOutputFileAppend(filename, output);
1206 int ableToOpen = openInputFile(temp, input, "no error");
1207 //int ableToOpen = openInputFile(temp, input);
1210 if (ableToOpen == 0) { //you opened it
1213 while (!input.eof()) {
1214 input.read(buffer, 4096);
1215 output.write(buffer, input.gcount());
1216 //count number of lines
1217 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1226 catch(exception& e) {
1227 errorOut(e, "MothurOut", "appendFiles");
1231 /**************************************************************************************************/
1232 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1237 //open output file in append mode
1238 openOutputFileAppend(filename, output);
1239 int ableToOpen = openInputFile(temp, input, "no error");
1240 //int ableToOpen = openInputFile(temp, input);
1243 if (ableToOpen == 0) { //you opened it
1245 string headers = getline(input); gobble(input);
1246 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1249 while (!input.eof()) {
1250 input.read(buffer, 4096);
1251 output.write(buffer, input.gcount());
1252 //count number of lines
1253 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1262 catch(exception& e) {
1263 errorOut(e, "MothurOut", "appendFiles");
1267 /**************************************************************************************************/
1268 string MothurOut::sortFile(string distFile, string outputDir){
1271 //if (outputDir == "") { outputDir += hasPath(distFile); }
1272 string outfile = getRootName(distFile) + "sorted.dist";
1275 //if you can, use the unix sort since its been optimized for years
1276 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1277 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1278 system(command.c_str());
1279 #else //you are stuck with my best attempt...
1280 //windows sort does not have a way to specify a column, only a character in the line
1281 //since we cannot assume that the distance will always be at the the same character location on each line
1282 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1284 //read in file line by file and put distance first
1285 string tempDistFile = distFile + ".temp";
1288 openInputFile(distFile, input);
1289 openOutputFile(tempDistFile, output);
1291 string firstName, secondName;
1293 while (!input.eof()) {
1294 input >> firstName >> secondName >> dist;
1295 output << dist << '\t' << firstName << '\t' << secondName << endl;
1302 //sort using windows sort
1303 string tempOutfile = outfile + ".temp";
1304 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1305 system(command.c_str());
1307 //read in sorted file and put distance at end again
1310 openInputFile(tempOutfile, input2);
1311 openOutputFile(outfile, output2);
1313 while (!input2.eof()) {
1314 input2 >> dist >> firstName >> secondName;
1315 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1322 mothurRemove(tempDistFile);
1323 mothurRemove(tempOutfile);
1328 catch(exception& e) {
1329 errorOut(e, "MothurOut", "sortFile");
1333 /**************************************************************************************************/
1334 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1336 vector<unsigned long long> positions;
1338 //openInputFile(filename, inFASTA);
1339 inFASTA.open(filename.c_str(), ios::binary);
1342 unsigned long long count = 0;
1343 while(!inFASTA.eof()){
1344 //input = getline(inFASTA);
1345 //cout << input << '\t' << inFASTA.tellg() << endl;
1346 //if (input.length() != 0) {
1347 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1349 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1350 char c = inFASTA.get(); count++;
1352 positions.push_back(count-1);
1353 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1358 num = positions.size();
1359 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1361 unsigned long long size;
1363 //get num bytes in file
1364 pFile = fopen (filename.c_str(),"rb");
1365 if (pFile==NULL) perror ("Error opening file");
1367 fseek (pFile, 0, SEEK_END);
1372 /*unsigned long long size = positions[(positions.size()-1)];
1374 openInputFile(filename, in);
1379 if(in.eof()) { break; }
1384 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1386 positions.push_back(size);
1391 catch(exception& e) {
1392 errorOut(e, "MothurOut", "setFilePosFasta");
1396 //**********************************************************************************************************************
1397 vector<consTax> MothurOut::readConsTax(string inputfile){
1400 vector<consTax> taxes;
1403 openInputFile(inputfile, in);
1410 if (control_pressed) { break; }
1412 string otu = ""; string tax = "unknown";
1415 in >> otu >> size >> tax; gobble(in);
1416 consTax temp(otu, tax, size);
1417 taxes.push_back(temp);
1423 catch(exception& e) {
1424 errorOut(e, "MothurOut", "readConsTax");
1428 //**********************************************************************************************************************
1429 int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
1432 openInputFile(inputfile, in);
1439 if (control_pressed) { break; }
1441 string otu = ""; string tax = "unknown";
1444 in >> otu >> size >> tax; gobble(in);
1445 consTax2 temp(tax, size);
1452 catch(exception& e) {
1453 errorOut(e, "MothurOut", "readConsTax");
1457 /**************************************************************************************************/
1458 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1460 filename = getFullPathName(filename);
1462 vector<unsigned long long> positions;
1464 //openInputFile(filename, in);
1465 in.open(filename.c_str(), ios::binary);
1468 unsigned long long count = 0;
1469 positions.push_back(0);
1472 //getline counting reads
1473 char d = in.get(); count++;
1474 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1475 //get next character
1481 d=in.get(); count++;
1482 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1484 positions.push_back(count-1);
1485 //cout << count-1 << endl;
1489 num = positions.size()-1;
1492 unsigned long long size;
1494 //get num bytes in file
1495 pFile = fopen (filename.c_str(),"rb");
1496 if (pFile==NULL) perror ("Error opening file");
1498 fseek (pFile, 0, SEEK_END);
1503 positions[(positions.size()-1)] = size;
1507 catch(exception& e) {
1508 errorOut(e, "MothurOut", "setFilePosEachLine");
1512 /**************************************************************************************************/
1514 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1516 vector<unsigned long long> filePos;
1517 filePos.push_back(0);
1520 unsigned long long size;
1522 filename = getFullPathName(filename);
1524 //get num bytes in file
1525 pFile = fopen (filename.c_str(),"rb");
1526 if (pFile==NULL) perror ("Error opening file");
1528 fseek (pFile, 0, SEEK_END);
1533 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1535 //estimate file breaks
1536 unsigned long long chunkSize = 0;
1537 chunkSize = size / proc;
1539 //file to small to divide by processors
1540 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1542 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1543 for (int i = 0; i < proc; i++) {
1544 unsigned long long spot = (i+1) * chunkSize;
1547 openInputFile(filename, in);
1551 unsigned long long newSpot = spot;
1555 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1556 else if (int(c) == -1) { break; }
1560 //there was not another sequence before the end of the file
1561 unsigned long long sanityPos = in.tellg();
1563 if (sanityPos == -1) { break; }
1564 else { filePos.push_back(newSpot); }
1570 filePos.push_back(size);
1572 //sanity check filePos
1573 for (int i = 0; i < (filePos.size()-1); i++) {
1574 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1577 proc = (filePos.size() - 1);
1579 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1581 filePos.push_back(size);
1585 catch(exception& e) {
1586 errorOut(e, "MothurOut", "divideFile");
1590 /**************************************************************************************************/
1592 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1594 vector<unsigned long long> filePos;
1595 filePos.push_back(0);
1598 unsigned long long size;
1600 filename = getFullPathName(filename);
1602 //get num bytes in file
1603 pFile = fopen (filename.c_str(),"rb");
1604 if (pFile==NULL) perror ("Error opening file");
1606 fseek (pFile, 0, SEEK_END);
1611 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1613 //estimate file breaks
1614 unsigned long long chunkSize = 0;
1615 chunkSize = size / proc;
1617 //file to small to divide by processors
1618 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1620 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1621 for (int i = 0; i < proc; i++) {
1622 unsigned long long spot = (i+1) * chunkSize;
1625 openInputFile(filename, in);
1628 //look for next line break
1629 unsigned long long newSpot = spot;
1633 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1634 else if (int(c) == -1) { break; }
1637 //there was not another line before the end of the file
1638 unsigned long long sanityPos = in.tellg();
1640 if (sanityPos == -1) { break; }
1641 else { filePos.push_back(newSpot); }
1647 filePos.push_back(size);
1649 //sanity check filePos
1650 for (int i = 0; i < (filePos.size()-1); i++) {
1651 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1654 proc = (filePos.size() - 1);
1656 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1658 filePos.push_back(size);
1662 catch(exception& e) {
1663 errorOut(e, "MothurOut", "divideFile");
1667 /**************************************************************************************************/
1668 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1671 vector<unsigned long long> filePos = divideFile(filename, proc);
1673 for (int i = 0; i < (filePos.size()-1); i++) {
1677 openInputFile(filename, in);
1678 in.seekg(filePos[i]);
1679 unsigned long long size = filePos[(i+1)] - filePos[i];
1680 char* chunk = new char[size];
1681 in.read(chunk, size);
1685 string fileChunkName = filename + "." + toString(i) + ".tmp";
1687 openOutputFile(fileChunkName, out);
1689 out << chunk << endl;
1694 files.push_back(fileChunkName);
1699 catch(exception& e) {
1700 errorOut(e, "MothurOut", "divideFile");
1704 /***********************************************************************/
1706 bool MothurOut::isTrue(string f){
1709 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1711 if ((f == "TRUE") || (f == "T")) { return true; }
1712 else { return false; }
1714 catch(exception& e) {
1715 errorOut(e, "MothurOut", "isTrue");
1720 /***********************************************************************/
1722 float MothurOut::roundDist(float dist, int precision){
1724 return int(dist * precision + 0.5)/float(precision);
1726 catch(exception& e) {
1727 errorOut(e, "MothurOut", "roundDist");
1731 /***********************************************************************/
1733 float MothurOut::ceilDist(float dist, int precision){
1735 return int(ceil(dist * precision))/float(precision);
1737 catch(exception& e) {
1738 errorOut(e, "MothurOut", "ceilDist");
1742 /***********************************************************************/
1744 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1746 vector<string> pieces;
1748 for (int i = 0; i < size; i++) {
1749 if (!isspace(buffer[i])) { rest += buffer[i]; }
1751 if (rest != "") { pieces.push_back(rest); rest = ""; }
1752 while (i < size) { //gobble white space
1753 if (isspace(buffer[i])) { i++; }
1754 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1761 catch(exception& e) {
1762 errorOut(e, "MothurOut", "splitWhiteSpace");
1766 /***********************************************************************/
1767 vector<string> MothurOut::splitWhiteSpace(string input){
1769 vector<string> pieces;
1772 for (int i = 0; i < input.length(); i++) {
1773 if (!isspace(input[i])) { rest += input[i]; }
1775 if (rest != "") { pieces.push_back(rest); rest = ""; }
1776 while (i < input.length()) { //gobble white space
1777 if (isspace(input[i])) { i++; }
1778 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1783 if (rest != "") { pieces.push_back(rest); }
1787 catch(exception& e) {
1788 errorOut(e, "MothurOut", "splitWhiteSpace");
1792 /***********************************************************************/
1793 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1795 vector<string> pieces;
1798 int pos = input.find('\'');
1799 int pos2 = input.find('\"');
1801 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1803 for (int i = 0; i < input.length(); i++) {
1804 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1806 for (int j = i+1; j < input.length(); j++) {
1807 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
1811 }else { rest += input[j]; }
1813 }else if (!isspace(input[i])) { rest += input[i]; }
1815 if (rest != "") { pieces.push_back(rest); rest = ""; }
1816 while (i < input.length()) { //gobble white space
1817 if (isspace(input[i])) { i++; }
1818 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1823 if (rest != "") { pieces.push_back(rest); }
1827 catch(exception& e) {
1828 errorOut(e, "MothurOut", "splitWhiteSpace");
1832 //**********************************************************************************************************************
1833 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
1837 openInputFile(namefile, in);
1841 bool pairDone = false;
1842 bool columnOne = true;
1843 string firstCol, secondCol;
1846 if (control_pressed) { break; }
1848 in.read(buffer, 4096);
1849 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1851 for (int i = 0; i < pieces.size(); i++) {
1852 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1853 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1856 checkName(firstCol);
1857 //are there confidence scores, if so remove them
1858 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1859 map<string, string>::iterator itTax = taxMap.find(firstCol);
1861 if(itTax == taxMap.end()) {
1862 bool ignore = false;
1863 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1865 if (!ignore) { taxMap[firstCol] = secondCol; }
1866 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1868 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1877 vector<string> pieces = splitWhiteSpace(rest);
1879 for (int i = 0; i < pieces.size(); i++) {
1880 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1881 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1884 checkName(firstCol);
1885 //are there confidence scores, if so remove them
1886 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1887 map<string, string>::iterator itTax = taxMap.find(firstCol);
1889 if(itTax == taxMap.end()) {
1890 bool ignore = false;
1891 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1893 if (!ignore) { taxMap[firstCol] = secondCol; }
1894 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1896 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1904 return taxMap.size();
1907 catch(exception& e) {
1908 errorOut(e, "MothurOut", "readTax");
1912 /**********************************************************************************************************************/
1913 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
1917 openInputFile(namefile, in);
1921 bool pairDone = false;
1922 bool columnOne = true;
1923 string firstCol, secondCol;
1926 if (control_pressed) { break; }
1928 in.read(buffer, 4096);
1929 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1931 for (int i = 0; i < pieces.size(); i++) {
1932 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1933 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1936 checkName(firstCol);
1937 checkName(secondCol);
1939 //parse names into vector
1940 vector<string> theseNames;
1941 splitAtComma(secondCol, theseNames);
1942 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1950 vector<string> pieces = splitWhiteSpace(rest);
1952 for (int i = 0; i < pieces.size(); i++) {
1953 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1954 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1957 checkName(firstCol);
1958 checkName(secondCol);
1960 //parse names into vector
1961 vector<string> theseNames;
1962 splitAtComma(secondCol, theseNames);
1963 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1969 return nameMap.size();
1972 catch(exception& e) {
1973 errorOut(e, "MothurOut", "readNames");
1977 /**********************************************************************************************************************/
1978 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
1982 openInputFile(namefile, in);
1986 bool pairDone = false;
1987 bool columnOne = true;
1988 string firstCol, secondCol;
1991 if (control_pressed) { break; }
1993 in.read(buffer, 4096);
1994 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1996 for (int i = 0; i < pieces.size(); i++) {
1997 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1998 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2001 checkName(firstCol);
2002 checkName(secondCol);
2003 nameMap[secondCol] = firstCol;
2011 vector<string> pieces = splitWhiteSpace(rest);
2013 for (int i = 0; i < pieces.size(); i++) {
2014 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2015 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2018 checkName(firstCol);
2019 checkName(secondCol);
2020 nameMap[secondCol] = firstCol;
2026 return nameMap.size();
2029 catch(exception& e) {
2030 errorOut(e, "MothurOut", "readNames");
2034 /**********************************************************************************************************************/
2035 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
2037 nameMap.clear(); nameCount.clear();
2040 openInputFile(namefile, in);
2044 bool pairDone = false;
2045 bool columnOne = true;
2046 string firstCol, secondCol;
2049 if (control_pressed) { break; }
2051 in.read(buffer, 4096);
2052 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2054 for (int i = 0; i < pieces.size(); i++) {
2055 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2056 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2059 checkName(firstCol);
2060 checkName(secondCol);
2061 //parse names into vector
2062 vector<string> theseNames;
2063 splitAtComma(secondCol, theseNames);
2064 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2065 nameCount[firstCol] = theseNames.size();
2073 vector<string> pieces = splitWhiteSpace(rest);
2075 for (int i = 0; i < pieces.size(); i++) {
2076 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2077 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2080 checkName(firstCol);
2081 checkName(secondCol);
2082 //parse names into vector
2083 vector<string> theseNames;
2084 splitAtComma(secondCol, theseNames);
2085 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2086 nameCount[firstCol] = theseNames.size();
2092 return nameMap.size();
2095 catch(exception& e) {
2096 errorOut(e, "MothurOut", "readNames");
2100 /**********************************************************************************************************************/
2101 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2105 openInputFile(namefile, in);
2109 bool pairDone = false;
2110 bool columnOne = true;
2111 string firstCol, secondCol;
2114 if (control_pressed) { break; }
2116 in.read(buffer, 4096);
2117 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2119 for (int i = 0; i < pieces.size(); i++) {
2120 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2121 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2124 checkName(firstCol);
2125 checkName(secondCol);
2126 nameMap[firstCol] = secondCol; pairDone = false; }
2132 vector<string> pieces = splitWhiteSpace(rest);
2134 for (int i = 0; i < pieces.size(); i++) {
2135 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2136 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2139 checkName(firstCol);
2140 checkName(secondCol);
2141 nameMap[firstCol] = secondCol; pairDone = false; }
2145 return nameMap.size();
2148 catch(exception& e) {
2149 errorOut(e, "MothurOut", "readNames");
2153 /**********************************************************************************************************************/
2154 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2158 openInputFile(namefile, in);
2162 bool pairDone = false;
2163 bool columnOne = true;
2164 string firstCol, secondCol;
2167 if (control_pressed) { break; }
2169 in.read(buffer, 4096);
2170 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2172 for (int i = 0; i < pieces.size(); i++) {
2173 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2174 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2177 checkName(firstCol);
2178 checkName(secondCol);
2179 vector<string> temp;
2180 splitAtComma(secondCol, temp);
2181 nameMap[firstCol] = temp;
2189 vector<string> pieces = splitWhiteSpace(rest);
2191 for (int i = 0; i < pieces.size(); i++) {
2192 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2193 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2196 checkName(firstCol);
2197 checkName(secondCol);
2198 vector<string> temp;
2199 splitAtComma(secondCol, temp);
2200 nameMap[firstCol] = temp;
2206 return nameMap.size();
2208 catch(exception& e) {
2209 errorOut(e, "MothurOut", "readNames");
2213 /**********************************************************************************************************************/
2214 map<string, int> MothurOut::readNames(string namefile) {
2216 map<string, int> nameMap;
2220 openInputFile(namefile, in);
2224 bool pairDone = false;
2225 bool columnOne = true;
2226 string firstCol, secondCol;
2229 if (control_pressed) { break; }
2231 in.read(buffer, 4096);
2232 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2234 for (int i = 0; i < pieces.size(); i++) {
2235 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2236 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2239 checkName(firstCol);
2240 checkName(secondCol);
2241 int num = getNumNames(secondCol);
2242 nameMap[firstCol] = num;
2250 vector<string> pieces = splitWhiteSpace(rest);
2251 for (int i = 0; i < pieces.size(); i++) {
2252 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2253 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2256 checkName(firstCol);
2257 checkName(secondCol);
2258 int num = getNumNames(secondCol);
2259 nameMap[firstCol] = num;
2268 catch(exception& e) {
2269 errorOut(e, "MothurOut", "readNames");
2273 /**********************************************************************************************************************/
2274 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2276 map<string, int> nameMap;
2281 openInputFile(namefile, in);
2285 bool pairDone = false;
2286 bool columnOne = true;
2287 string firstCol, secondCol;
2290 if (control_pressed) { break; }
2292 in.read(buffer, 4096);
2293 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2295 for (int i = 0; i < pieces.size(); i++) {
2296 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2297 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2300 checkName(firstCol);
2301 checkName(secondCol);
2302 int num = getNumNames(secondCol);
2303 nameMap[firstCol] = num;
2312 vector<string> pieces = splitWhiteSpace(rest);
2313 for (int i = 0; i < pieces.size(); i++) {
2314 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2315 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2318 checkName(firstCol);
2319 checkName(secondCol);
2320 int num = getNumNames(secondCol);
2321 nameMap[firstCol] = num;
2331 catch(exception& e) {
2332 errorOut(e, "MothurOut", "readNames");
2336 /************************************************************/
2337 int MothurOut::checkName(string& name) {
2340 for (int i = 0; i < name.length(); i++) {
2341 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2346 catch(exception& e) {
2347 errorOut(e, "MothurOut", "checkName");
2351 /**********************************************************************************************************************/
2352 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2358 openInputFile(namefile, in);
2362 bool pairDone = false;
2363 bool columnOne = true;
2364 string firstCol, secondCol;
2367 if (control_pressed) { break; }
2369 in.read(buffer, 4096);
2370 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2372 for (int i = 0; i < pieces.size(); i++) {
2373 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2374 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2377 checkName(firstCol);
2378 checkName(secondCol);
2379 int num = getNumNames(secondCol);
2381 map<string, string>::iterator it = fastamap.find(firstCol);
2382 if (it == fastamap.end()) {
2384 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2386 seqPriorityNode temp(num, it->second, firstCol);
2387 nameVector.push_back(temp);
2397 vector<string> pieces = splitWhiteSpace(rest);
2399 for (int i = 0; i < pieces.size(); i++) {
2400 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2401 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2404 checkName(firstCol);
2405 checkName(secondCol);
2406 int num = getNumNames(secondCol);
2408 map<string, string>::iterator it = fastamap.find(firstCol);
2409 if (it == fastamap.end()) {
2411 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2413 seqPriorityNode temp(num, it->second, firstCol);
2414 nameVector.push_back(temp);
2423 catch(exception& e) {
2424 errorOut(e, "MothurOut", "readNames");
2428 //**********************************************************************************************************************
2429 set<string> MothurOut::readAccnos(string accnosfile){
2433 openInputFile(accnosfile, in);
2440 if (control_pressed) { break; }
2442 in.read(buffer, 4096);
2443 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2445 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
2446 names.insert(pieces[i]);
2452 vector<string> pieces = splitWhiteSpace(rest);
2453 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2457 catch(exception& e) {
2458 errorOut(e, "MothurOut", "readAccnos");
2462 //**********************************************************************************************************************
2463 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2467 openInputFile(accnosfile, in);
2474 if (control_pressed) { break; }
2476 in.read(buffer, 4096);
2477 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2479 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2484 vector<string> pieces = splitWhiteSpace(rest);
2485 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2490 catch(exception& e) {
2491 errorOut(e, "MothurOut", "readAccnos");
2495 /***********************************************************************/
2497 int MothurOut::getNumNames(string names){
2503 for(int i=0;i<names.size();i++){
2504 if(names[i] == ','){
2512 catch(exception& e) {
2513 errorOut(e, "MothurOut", "getNumNames");
2517 /***********************************************************************/
2519 int MothurOut::getNumChar(string line, char c){
2524 for(int i=0;i<line.size();i++){
2533 catch(exception& e) {
2534 errorOut(e, "MothurOut", "getNumChar");
2538 //**********************************************************************************************************************
2539 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2543 if (subset.size() > bigset.size()) { return false; }
2545 //check if each guy in suset is also in bigset
2546 for (int i = 0; i < subset.size(); i++) {
2548 for (int j = 0; j < bigset.size(); j++) {
2549 if (subset[i] == bigset[j]) { match = true; break; }
2552 //you have a guy in subset that had no match in bigset
2553 if (match == false) { return false; }
2559 catch(exception& e) {
2560 errorOut(e, "MothurOut", "isSubset");
2564 /***********************************************************************/
2565 int MothurOut::mothurRemove(string filename){
2567 filename = getFullPathName(filename);
2568 int error = remove(filename.c_str());
2570 // if (errno != ENOENT) { //ENOENT == file does not exist
2571 // string message = "Error deleting file " + filename;
2572 // perror(message.c_str());
2577 catch(exception& e) {
2578 errorOut(e, "MothurOut", "mothurRemove");
2582 /***********************************************************************/
2583 bool MothurOut::mothurConvert(string item, int& num){
2587 if (isNumeric1(item)) {
2592 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2593 commandInputsConvertError = true;
2598 catch(exception& e) {
2599 errorOut(e, "MothurOut", "mothurConvert");
2603 /***********************************************************************/
2604 bool MothurOut::mothurConvert(string item, intDist& num){
2608 if (isNumeric1(item)) {
2613 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2614 commandInputsConvertError = true;
2619 catch(exception& e) {
2620 errorOut(e, "MothurOut", "mothurConvert");
2625 /***********************************************************************/
2626 bool MothurOut::isNumeric1(string stringToCheck){
2628 bool numeric = false;
2630 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2634 catch(exception& e) {
2635 errorOut(e, "MothurOut", "isNumeric1");
2640 /***********************************************************************/
2641 bool MothurOut::mothurConvert(string item, float& num){
2645 if (isNumeric1(item)) {
2650 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2651 commandInputsConvertError = true;
2656 catch(exception& e) {
2657 errorOut(e, "MothurOut", "mothurConvert");
2661 /***********************************************************************/
2662 bool MothurOut::mothurConvert(string item, double& num){
2666 if (isNumeric1(item)) {
2671 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2672 commandInputsConvertError = true;
2677 catch(exception& e) {
2678 errorOut(e, "MothurOut", "mothurConvert");
2682 /**************************************************************************************************/
2684 vector<vector<double> > MothurOut::binomial(int maxOrder){
2686 vector<vector<double> > binomial(maxOrder+1);
2688 for(int i=0;i<=maxOrder;i++){
2689 binomial[i].resize(maxOrder+1);
2698 for(int i=2;i<=maxOrder;i++){
2702 for(int i=2;i<=maxOrder;i++){
2703 for(int j=1;j<=maxOrder;j++){
2704 if(i==j){ binomial[i][j]=1; }
2705 if(j>i) { binomial[i][j]=0; }
2706 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2713 catch(exception& e) {
2714 errorOut(e, "MothurOut", "binomial");
2718 /**************************************************************************************************/
2719 unsigned int MothurOut::fromBase36(string base36){
2721 unsigned int num = 0;
2723 map<char, int> converts;
2788 while (i < base36.length()) {
2790 num = 36 * num + converts[c];
2797 catch(exception& e) {
2798 errorOut(e, "MothurOut", "fromBase36");
2802 /***********************************************************************/
2804 int MothurOut::factorial(int num){
2808 for (int i = 1; i <= num; i++) {
2814 catch(exception& e) {
2815 errorOut(e, "MothurOut", "factorial");
2819 /***********************************************************************/
2821 int MothurOut::getNumSeqs(ifstream& file){
2823 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
2827 catch(exception& e) {
2828 errorOut(e, "MothurOut", "getNumSeqs");
2832 /***********************************************************************/
2833 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
2838 input = getline(file);
2839 if (input.length() != 0) {
2840 if(input[0] == '>'){ numSeqs++; }
2844 catch(exception& e) {
2845 errorOut(e, "MothurOut", "getNumSeqs");
2849 /***********************************************************************/
2851 //This function parses the estimator options and puts them in a vector
2852 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
2855 if (symbol == '-') { splitAtDash(estim, container); return; }
2857 string individual = "";
2858 int estimLength = estim.size();
2859 for(int i=0;i<estimLength;i++){
2860 if(estim[i] == symbol){
2861 container.push_back(individual);
2865 individual += estim[i];
2868 container.push_back(individual);
2871 catch(exception& e) {
2872 errorOut(e, "MothurOut", "splitAtChar");
2877 /***********************************************************************/
2879 //This function parses the estimator options and puts them in a vector
2880 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
2882 string individual = "";
2883 int estimLength = estim.size();
2884 bool prevEscape = false;
2885 /*for(int i=0;i<estimLength;i++){
2887 individual += estim[i];
2891 if(estim[i] == '\\'){
2894 else if(estim[i] == '-'){
2895 container.push_back(individual);
2900 individual += estim[i];
2907 for(int i=0;i<estimLength;i++){
2908 if(estim[i] == '-'){
2909 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2911 container.push_back(individual);
2914 }else if(estim[i] == '\\'){
2915 if (i < estimLength-1) {
2916 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2917 else { individual += estim[i]; prevEscape = false; } //if no, add in
2918 }else { individual += estim[i]; }
2920 individual += estim[i];
2926 container.push_back(individual);
2928 catch(exception& e) {
2929 errorOut(e, "MothurOut", "splitAtDash");
2934 /***********************************************************************/
2935 //This function parses the label options and puts them in a set
2936 void MothurOut::splitAtDash(string& estim, set<string>& container) {
2938 string individual = "";
2939 int estimLength = estim.size();
2940 bool prevEscape = false;
2942 for(int i=0;i<estimLength;i++){
2944 individual += estim[i];
2948 if(estim[i] == '\\'){
2951 else if(estim[i] == '-'){
2952 container.insert(individual);
2957 individual += estim[i];
2964 for(int i=0;i<estimLength;i++){
2965 if(estim[i] == '-'){
2966 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2968 container.insert(individual);
2971 }else if(estim[i] == '\\'){
2972 if (i < estimLength-1) {
2973 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2974 else { individual += estim[i]; prevEscape = false; } //if no, add in
2975 }else { individual += estim[i]; }
2977 individual += estim[i];
2980 container.insert(individual);
2983 catch(exception& e) {
2984 errorOut(e, "MothurOut", "splitAtDash");
2988 /***********************************************************************/
2989 //This function parses the line options and puts them in a set
2990 void MothurOut::splitAtDash(string& estim, set<int>& container) {
2992 string individual = "";
2994 int estimLength = estim.size();
2995 bool prevEscape = false;
2997 for(int i=0;i<estimLength;i++){
2999 individual += estim[i];
3003 if(estim[i] == '\\'){
3006 else if(estim[i] == '-'){
3007 convert(individual, lineNum); //convert the string to int
3008 container.insert(lineNum);
3013 individual += estim[i];
3019 for(int i=0;i<estimLength;i++){
3020 if(estim[i] == '-'){
3021 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3023 convert(individual, lineNum); //convert the string to int
3024 container.insert(lineNum);
3027 }else if(estim[i] == '\\'){
3028 if (i < estimLength-1) {
3029 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3030 else { individual += estim[i]; prevEscape = false; } //if no, add in
3031 }else { individual += estim[i]; }
3033 individual += estim[i];
3037 convert(individual, lineNum); //convert the string to int
3038 container.insert(lineNum);
3040 catch(exception& e) {
3041 errorOut(e, "MothurOut", "splitAtDash");
3046 /***********************************************************************/
3047 string MothurOut::makeList(vector<string>& names) {
3051 if (names.size() == 0) { return list; }
3053 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
3056 list += names[names.size()-1];
3060 catch(exception& e) {
3061 errorOut(e, "MothurOut", "makeList");
3066 /***********************************************************************/
3067 //This function parses the a string and puts peices in a vector
3068 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
3070 string individual = "";
3071 int estimLength = estim.size();
3072 for(int i=0;i<estimLength;i++){
3073 if(estim[i] == ','){
3074 container.push_back(individual);
3078 individual += estim[i];
3081 container.push_back(individual);
3086 // string individual;
3088 // while (estim.find_first_of(',') != -1) {
3089 // individual = estim.substr(0,estim.find_first_of(','));
3090 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
3091 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
3092 // container.push_back(individual);
3096 // container.push_back(estim);
3098 catch(exception& e) {
3099 errorOut(e, "MothurOut", "splitAtComma");
3103 /***********************************************************************/
3104 //This function splits up the various option parameters
3105 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3107 prefix = suffix.substr(0,suffix.find_first_of(c));
3108 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3109 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3111 while(suffix.at(0) == ' ')
3112 suffix = suffix.substr(1, suffix.length());
3113 }else { suffix = ""; }
3116 catch(exception& e) {
3117 errorOut(e, "MothurOut", "splitAtChar");
3122 /***********************************************************************/
3124 //This function splits up the various option parameters
3125 void MothurOut::splitAtComma(string& prefix, string& suffix){
3127 prefix = suffix.substr(0,suffix.find_first_of(','));
3128 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3129 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3131 while(suffix.at(0) == ' ')
3132 suffix = suffix.substr(1, suffix.length());
3133 }else { suffix = ""; }
3136 catch(exception& e) {
3137 errorOut(e, "MothurOut", "splitAtComma");
3141 /***********************************************************************/
3143 //This function separates the key value from the option value i.e. dist=96_...
3144 void MothurOut::splitAtEquals(string& key, string& value){
3146 if(value.find_first_of('=') != -1){
3147 key = value.substr(0,value.find_first_of('='));
3148 if ((value.find_first_of('=')+1) <= value.length()) {
3149 value = value.substr(value.find_first_of('=')+1, value.length());
3156 catch(exception& e) {
3157 errorOut(e, "MothurOut", "splitAtEquals");
3162 /**************************************************************************************************/
3164 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3166 for (int i = 0; i < Groups.size(); i++) {
3167 if (groupname == Groups[i]) { return true; }
3171 catch(exception& e) {
3172 errorOut(e, "MothurOut", "inUsersGroups");
3176 /**************************************************************************************************/
3178 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3180 for (int i = 0; i < sets.size(); i++) {
3181 if (set == sets[i]) { return true; }
3185 catch(exception& e) {
3186 errorOut(e, "MothurOut", "inUsersGroups");
3190 /**************************************************************************************************/
3192 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3194 for (int i = 0; i < Groups.size(); i++) {
3195 if (groupname == Groups[i]) { return true; }
3199 catch(exception& e) {
3200 errorOut(e, "MothurOut", "inUsersGroups");
3205 /**************************************************************************************************/
3206 //returns true if any of the strings in first vector are in second vector
3207 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3210 for (int i = 0; i < groupnames.size(); i++) {
3211 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3215 catch(exception& e) {
3216 errorOut(e, "MothurOut", "inUsersGroups");
3220 /***********************************************************************/
3221 //this function determines if the user has given us labels that are smaller than the given label.
3222 //if so then it returns true so that the calling function can run the previous valid distance.
3223 //it's a "smart" distance function. It also checks for invalid labels.
3224 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3227 set<string>::iterator it;
3228 vector<float> orderFloat;
3229 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3230 map<string, float>::iterator it2;
3232 bool smaller = false;
3234 //unique is the smallest line
3235 if (label == "unique") { return false; }
3237 if (convertTestFloat(label, labelFloat)) {
3238 convert(label, labelFloat);
3239 }else { //cant convert
3244 //go through users set and make them floats
3245 for(it = userLabels.begin(); it != userLabels.end();) {
3248 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3250 orderFloat.push_back(temp);
3251 userMap[*it] = temp;
3253 }else if (*it == "unique") {
3254 orderFloat.push_back(-1.0);
3255 userMap["unique"] = -1.0;
3258 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3259 userLabels.erase(it++);
3264 sort(orderFloat.begin(), orderFloat.end());
3266 /*************************************************/
3267 //is this label bigger than any of the users labels
3268 /*************************************************/
3270 //loop through order until you find a label greater than label
3271 for (int i = 0; i < orderFloat.size(); i++) {
3272 if (orderFloat[i] < labelFloat) {
3274 if (orderFloat[i] == -1) {
3275 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3276 userLabels.erase("unique");
3279 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3281 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3282 if (it2->second == orderFloat[i]) {
3284 //remove small labels
3285 userLabels.erase(s);
3289 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3291 //since they are sorted once you find a bigger one stop looking
3298 catch(exception& e) {
3299 errorOut(e, "MothurOut", "anyLabelsToProcess");
3304 /**************************************************************************************************/
3305 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3310 string line = getline(file);
3312 //before we added this check
3313 if (line[0] != '#') { good = false; }
3316 line = line.substr(1);
3318 vector<string> versionVector;
3319 splitAtChar(version, versionVector, '.');
3321 //check file version
3322 vector<string> linesVector;
3323 splitAtChar(line, linesVector, '.');
3325 if (versionVector.size() != linesVector.size()) { good = false; }
3327 for (int j = 0; j < versionVector.size(); j++) {
3329 convert(versionVector[j], num1);
3330 convert(linesVector[j], num2);
3332 //if mothurs version is newer than this files version, then we want to remake it
3333 if (num1 > num2) { good = false; break; }
3339 if (!good) { file.close(); }
3340 else { file.seekg(0); }
3344 catch(exception& e) {
3345 errorOut(e, "MothurOut", "checkReleaseVersion");
3349 /**************************************************************************************************/
3350 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3352 vector<double> averages; //averages.resize(numComp, 0.0);
3353 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3355 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3356 for (int i = 0; i < dists[thisIter].size(); i++) {
3357 averages[i] += dists[thisIter][i];
3362 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3366 catch(exception& e) {
3367 errorOut(e, "MothurOut", "getAverages");
3371 /**************************************************************************************************/
3372 double MothurOut::getAverage(vector<double> dists) {
3376 for (int i = 0; i < dists.size(); i++) {
3377 average += dists[i];
3381 average /= (double) dists.size();
3385 catch(exception& e) {
3386 errorOut(e, "MothurOut", "getAverage");
3391 /**************************************************************************************************/
3392 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3395 vector<double> averages = getAverages(dists);
3397 //find standard deviation
3398 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3399 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3401 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3402 for (int j = 0; j < dists[thisIter].size(); j++) {
3403 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3406 for (int i = 0; i < stdDev.size(); i++) {
3407 stdDev[i] /= (double) dists.size();
3408 stdDev[i] = sqrt(stdDev[i]);
3413 catch(exception& e) {
3414 errorOut(e, "MothurOut", "getAverages");
3418 /**************************************************************************************************/
3419 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3421 //find standard deviation
3422 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3423 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3425 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3426 for (int j = 0; j < dists[thisIter].size(); j++) {
3427 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3430 for (int i = 0; i < stdDev.size(); i++) {
3431 stdDev[i] /= (double) dists.size();
3432 stdDev[i] = sqrt(stdDev[i]);
3437 catch(exception& e) {
3438 errorOut(e, "MothurOut", "getAverages");
3442 /**************************************************************************************************/
3443 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3446 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3447 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3448 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3449 vector<seqDist> temp;
3450 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3452 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3453 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3454 tempDist.dist = 0.0;
3455 temp.push_back(tempDist);
3457 calcAverages.push_back(temp);
3460 if (mode == "average") {
3461 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3462 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3463 for (int j = 0; j < calcAverages[i].size(); j++) {
3464 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3469 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3470 for (int j = 0; j < calcAverages[i].size(); j++) {
3471 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3474 }else { //find median
3475 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3476 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3477 vector<double> dists;
3478 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3479 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3481 sort(dists.begin(), dists.end());
3482 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3487 return calcAverages;
3489 catch(exception& e) {
3490 errorOut(e, "MothurOut", "getAverages");
3494 /**************************************************************************************************/
3495 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3498 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3499 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3500 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3501 vector<seqDist> temp;
3502 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3504 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3505 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3506 tempDist.dist = 0.0;
3507 temp.push_back(tempDist);
3509 calcAverages.push_back(temp);
3513 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3514 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3515 for (int j = 0; j < calcAverages[i].size(); j++) {
3516 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3521 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3522 for (int j = 0; j < calcAverages[i].size(); j++) {
3523 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3527 return calcAverages;
3529 catch(exception& e) {
3530 errorOut(e, "MothurOut", "getAverages");
3534 /**************************************************************************************************/
3535 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3538 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3540 //find standard deviation
3541 vector< vector<seqDist> > stdDev;
3542 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3543 vector<seqDist> temp;
3544 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3546 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3547 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3548 tempDist.dist = 0.0;
3549 temp.push_back(tempDist);
3551 stdDev.push_back(temp);
3554 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3555 for (int i = 0; i < stdDev.size(); i++) {
3556 for (int j = 0; j < stdDev[i].size(); j++) {
3557 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3562 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3563 for (int j = 0; j < stdDev[i].size(); j++) {
3564 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3565 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3571 catch(exception& e) {
3572 errorOut(e, "MothurOut", "getAverages");
3576 /**************************************************************************************************/
3577 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3579 //find standard deviation
3580 vector< vector<seqDist> > stdDev;
3581 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3582 vector<seqDist> temp;
3583 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3585 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3586 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3587 tempDist.dist = 0.0;
3588 temp.push_back(tempDist);
3590 stdDev.push_back(temp);
3593 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3594 for (int i = 0; i < stdDev.size(); i++) {
3595 for (int j = 0; j < stdDev[i].size(); j++) {
3596 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3601 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3602 for (int j = 0; j < stdDev[i].size(); j++) {
3603 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3604 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3610 catch(exception& e) {
3611 errorOut(e, "MothurOut", "getAverages");
3616 /**************************************************************************************************/
3617 bool MothurOut::isContainingOnlyDigits(string input) {
3620 //are you a digit in ascii code
3621 for (int i = 0;i < input.length(); i++){
3622 if( input[i]>47 && input[i]<58){}
3623 else { return false; }
3628 catch(exception& e) {
3629 errorOut(e, "MothurOut", "isContainingOnlyDigits");
3633 /**************************************************************************************************/
3634 int MothurOut::removeConfidences(string& tax) {
3640 while (tax.find_first_of(';') != -1) {
3642 if (control_pressed) { return 0; }
3645 taxon = tax.substr(0,tax.find_first_of(';'));
3647 int pos = taxon.find_last_of('(');
3650 int pos2 = taxon.find_last_of(')');
3652 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
3653 if (isNumeric1(confidenceScore)) {
3654 taxon = taxon.substr(0, pos); //rip off confidence
3660 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
3668 catch(exception& e) {
3669 errorOut(e, "MothurOut", "removeConfidences");
3673 /**************************************************************************************************/
3674 string MothurOut::removeQuotes(string tax) {
3680 for (int i = 0; i < tax.length(); i++) {
3682 if (control_pressed) { return newTax; }
3684 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
3690 catch(exception& e) {
3691 errorOut(e, "MothurOut", "removeQuotes");
3695 /**************************************************************************************************/
3696 // function for calculating standard deviation
3697 double MothurOut::getStandardDeviation(vector<int>& featureVector){
3701 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
3702 average /= (double) featureVector.size();
3704 //find standard deviation
3706 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
3707 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
3710 stdDev /= (double) featureVector.size();
3711 stdDev = sqrt(stdDev);
3715 catch(exception& e) {
3716 errorOut(e, "MothurOut", "getStandardDeviation");
3720 /**************************************************************************************************/